PROJECT OBJECTIVE: Demonstrate the ability to fetch, process and leverage data to generate useful predictions by training Supervised Learning algorithms. --> KNN to be used as suggested in the question.
DATA DESCRIPTION: The data consists of biomechanics features of the patients according to their current conditions. Each patient is represented in the data set by six biomechanics attributes derived from the shape and orientation of the condition to their body part.
# Importing packages - Pandas, Numpy, Seaborn, Scipy
import pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns, sys
import matplotlib.style as style; style.use('fivethirtyeight')
from scipy.stats import zscore, norm
from sklearn.preprocessing import LabelEncoder
# Importing packages for KNN(supervised learning)
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
# Suppress warnings
import warnings; warnings.filterwarnings('ignore')
pd.options.display.max_rows = 4000
# importing dataset onto python
# 1st set
Normal = pd.read_csv('Part1 - Normal.csv')
Normal.head()
| P_incidence | P_tilt | L_angle | S_slope | P_radius | S_Degree | Class | |
|---|---|---|---|---|---|---|---|
| 0 | 38.505273 | 16.964297 | 35.112814 | 21.540976 | 127.632875 | 7.986683 | Normal |
| 1 | 54.920858 | 18.968430 | 51.601455 | 35.952428 | 125.846646 | 2.001642 | Normal |
| 2 | 44.362490 | 8.945435 | 46.902096 | 35.417055 | 129.220682 | 4.994195 | Normal |
| 3 | 48.318931 | 17.452121 | 48.000000 | 30.866809 | 128.980308 | -0.910941 | Normal |
| 4 | 45.701789 | 10.659859 | 42.577846 | 35.041929 | 130.178314 | -3.388910 | Normal |
Normal.shape
(100, 7)
#2nd dataset
typeH = pd.read_csv('Part1 - Type_H.csv')
typeH.head()
| P_incidence | P_tilt | L_angle | S_slope | P_radius | S_Degree | Class | |
|---|---|---|---|---|---|---|---|
| 0 | 63.027818 | 22.552586 | 39.609117 | 40.475232 | 98.672917 | -0.254400 | Type_H |
| 1 | 39.056951 | 10.060991 | 25.015378 | 28.995960 | 114.405425 | 4.564259 | Type_H |
| 2 | 68.832021 | 22.218482 | 50.092194 | 46.613539 | 105.985135 | -3.530317 | Type_H |
| 3 | 69.297008 | 24.652878 | 44.311238 | 44.644130 | 101.868495 | 11.211523 | Type_H |
| 4 | 49.712859 | 9.652075 | 28.317406 | 40.060784 | 108.168725 | 7.918501 | Type_H |
typeH.shape
(60, 7)
# 3rd dataset
typeS = pd.read_csv('Part1 - Type_S.csv')
typeS.head()
| P_incidence | P_tilt | L_angle | S_slope | P_radius | S_Degree | Class | |
|---|---|---|---|---|---|---|---|
| 0 | 74.377678 | 32.053104 | 78.772013 | 42.324573 | 143.560690 | 56.125906 | Type_S |
| 1 | 89.680567 | 32.704435 | 83.130732 | 56.976132 | 129.955476 | 92.027277 | Type_S |
| 2 | 44.529051 | 9.433234 | 52.000000 | 35.095817 | 134.711772 | 29.106575 | Type_S |
| 3 | 77.690577 | 21.380645 | 64.429442 | 56.309932 | 114.818751 | 26.931841 | Type_S |
| 4 | 76.147212 | 21.936186 | 82.961502 | 54.211027 | 123.932010 | 10.431972 | Type_S |
typeS.shape
(150, 7)
# Merging all 3 datasets onto one.
Final_set = [Normal,typeH,typeS]
final_set = pd.concat(Final_set)
final_set.shape
#100+60+150
(310, 7)
final_set.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 310 entries, 0 to 149 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 P_incidence 310 non-null float64 1 P_tilt 310 non-null float64 2 L_angle 310 non-null float64 3 S_slope 310 non-null float64 4 P_radius 310 non-null float64 5 S_Degree 310 non-null float64 6 Class 310 non-null object dtypes: float64(6), object(1) memory usage: 19.4+ KB
final_set.head(10)
| P_incidence | P_tilt | L_angle | S_slope | P_radius | S_Degree | Class | |
|---|---|---|---|---|---|---|---|
| 0 | 38.505273 | 16.964297 | 35.112814 | 21.540976 | 127.632875 | 7.986683 | Normal |
| 1 | 54.920858 | 18.968430 | 51.601455 | 35.952428 | 125.846646 | 2.001642 | Normal |
| 2 | 44.362490 | 8.945435 | 46.902096 | 35.417055 | 129.220682 | 4.994195 | Normal |
| 3 | 48.318931 | 17.452121 | 48.000000 | 30.866809 | 128.980308 | -0.910941 | Normal |
| 4 | 45.701789 | 10.659859 | 42.577846 | 35.041929 | 130.178314 | -3.388910 | Normal |
| 5 | 30.741938 | 13.354966 | 35.903526 | 17.386972 | 142.410107 | -2.005373 | Normal |
| 6 | 50.913101 | 6.677000 | 30.896522 | 44.236102 | 118.151531 | -1.057986 | Normal |
| 7 | 38.126589 | 6.557617 | 50.445075 | 31.568971 | 132.114805 | 6.338199 | Normal |
| 8 | 51.624672 | 15.969344 | 35.000000 | 35.655328 | 129.385308 | 1.009228 | Nrmal |
| 9 | 64.311867 | 26.328369 | 50.958964 | 37.983498 | 106.177751 | 3.118221 | Nrmal |
final_set.tail(10)
| P_incidence | P_tilt | L_angle | S_slope | P_radius | S_Degree | Class | |
|---|---|---|---|---|---|---|---|
| 140 | 63.364339 | 20.024621 | 67.498705 | 43.339718 | 130.999258 | 37.556706 | Type_S |
| 141 | 67.513053 | 33.275590 | 96.283062 | 34.237463 | 145.601033 | 88.301486 | Type_S |
| 142 | 76.314028 | 41.933683 | 93.284863 | 34.380345 | 132.267285 | 101.218783 | Type_S |
| 143 | 73.635962 | 9.711318 | 63.000000 | 63.924644 | 98.727930 | 26.975787 | Type_S |
| 144 | 56.535051 | 14.377189 | 44.991547 | 42.157862 | 101.723334 | 25.773174 | Type_S |
| 145 | 80.111572 | 33.942432 | 85.101608 | 46.169139 | 125.593624 | 100.292107 | Type_S |
| 146 | 95.480229 | 46.550053 | 59.000000 | 48.930176 | 96.683903 | 77.283072 | Type_S |
| 147 | 74.094731 | 18.823727 | 76.032156 | 55.271004 | 128.405731 | 73.388216 | Type_S |
| 148 | 87.679087 | 20.365613 | 93.822416 | 67.313473 | 120.944829 | 76.730629 | Type_S |
| 149 | 48.259920 | 16.417462 | 36.329137 | 31.842457 | 94.882336 | 28.343799 | Type_S |
final_set.Class.value_counts()
Type_S 133 Normal 73 Type_H 37 Nrmal 27 type_h 23 tp_s 17 Name: Class, dtype: int64
final_set.isnull().value_counts()
P_incidence P_tilt L_angle S_slope P_radius S_Degree Class False False False False False False False 310 dtype: int64
# correcting CLASS column
final_set = final_set.replace(to_replace='Nrmal',value='Normal',regex=True)
final_set = final_set.replace(to_replace='type_h',value='Type_H',regex=True)
final_set = final_set.replace(to_replace='tp_s',value='Type_S',regex=True)
final_set.Class.value_counts().to_frame()
| Class | |
|---|---|
| Type_S | 150 |
| Normal | 100 |
| Type_H | 60 |
final_set.all().to_frame()
| 0 | |
|---|---|
| P_incidence | True |
| P_tilt | True |
| L_angle | True |
| S_slope | True |
| P_radius | True |
| S_Degree | True |
| Class | True |
final_set.Class.value_counts(normalize=True)*100
Type_S 48.387097 Normal 32.258065 Type_H 19.354839 Name: Class, dtype: float64
final_set.reset_index(inplace=True)
final_set
| index | P_incidence | P_tilt | L_angle | S_slope | P_radius | S_Degree | Class | |
|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 38.505273 | 16.964297 | 35.112814 | 21.540976 | 127.632875 | 7.986683 | Normal |
| 1 | 1 | 54.920858 | 18.968430 | 51.601455 | 35.952428 | 125.846646 | 2.001642 | Normal |
| 2 | 2 | 44.362490 | 8.945435 | 46.902096 | 35.417055 | 129.220682 | 4.994195 | Normal |
| 3 | 3 | 48.318931 | 17.452121 | 48.000000 | 30.866809 | 128.980308 | -0.910941 | Normal |
| 4 | 4 | 45.701789 | 10.659859 | 42.577846 | 35.041929 | 130.178314 | -3.388910 | Normal |
| 5 | 5 | 30.741938 | 13.354966 | 35.903526 | 17.386972 | 142.410107 | -2.005373 | Normal |
| 6 | 6 | 50.913101 | 6.677000 | 30.896522 | 44.236102 | 118.151531 | -1.057986 | Normal |
| 7 | 7 | 38.126589 | 6.557617 | 50.445075 | 31.568971 | 132.114805 | 6.338199 | Normal |
| 8 | 8 | 51.624672 | 15.969344 | 35.000000 | 35.655328 | 129.385308 | 1.009228 | Normal |
| 9 | 9 | 64.311867 | 26.328369 | 50.958964 | 37.983498 | 106.177751 | 3.118221 | Normal |
| 10 | 10 | 44.489275 | 21.786433 | 31.474154 | 22.702842 | 113.778494 | -0.284129 | Normal |
| 11 | 11 | 54.950970 | 5.865353 | 53.000000 | 49.085617 | 126.970328 | -0.631603 | Normal |
| 12 | 12 | 56.103774 | 13.106307 | 62.637020 | 42.997467 | 116.228503 | 31.172767 | Normal |
| 13 | 13 | 69.398818 | 18.898407 | 75.966361 | 50.500411 | 103.582540 | -0.443661 | Normal |
| 14 | 14 | 89.834676 | 22.639217 | 90.563461 | 67.195460 | 100.501192 | 3.040973 | Normal |
| 15 | 15 | 59.726140 | 7.724873 | 55.343485 | 52.001268 | 125.174221 | 3.235159 | Normal |
| 16 | 16 | 63.959522 | 16.060945 | 63.123736 | 47.898577 | 142.360125 | 6.298971 | Normal |
| 17 | 17 | 61.540599 | 19.676957 | 52.892229 | 41.863642 | 118.686268 | 4.815031 | Normal |
| 18 | 18 | 38.046551 | 8.301669 | 26.236830 | 29.744881 | 123.803413 | 3.885773 | Normal |
| 19 | 19 | 43.436451 | 10.095743 | 36.032224 | 33.340707 | 137.439694 | -3.114451 | Normal |
| 20 | 20 | 65.611802 | 23.137919 | 62.582179 | 42.473883 | 124.128001 | -4.083298 | Normal |
| 21 | 21 | 53.911054 | 12.939318 | 39.000000 | 40.971736 | 118.193035 | 5.074353 | Normal |
| 22 | 22 | 43.117951 | 13.815744 | 40.347388 | 29.302207 | 128.517722 | 0.970926 | Normal |
| 23 | 23 | 40.683229 | 9.148437 | 31.021593 | 31.534792 | 139.118472 | -2.511619 | Normal |
| 24 | 24 | 37.731992 | 9.386298 | 42.000000 | 28.345694 | 135.740926 | 13.683047 | Normal |
| 25 | 25 | 63.929470 | 19.971097 | 40.177050 | 43.958373 | 113.065939 | -11.058179 | Normal |
| 26 | 26 | 61.821627 | 13.597105 | 64.000000 | 48.224523 | 121.779803 | 1.296191 | Normal |
| 27 | 27 | 62.140805 | 13.960975 | 58.000000 | 48.179830 | 133.281834 | 4.955106 | Normal |
| 28 | 28 | 69.004913 | 13.291790 | 55.570143 | 55.713123 | 126.611622 | 10.832011 | Normal |
| 29 | 29 | 56.447026 | 19.444499 | 43.577846 | 37.002527 | 139.189690 | -1.859689 | Normal |
| 30 | 30 | 41.646916 | 8.835549 | 36.031975 | 32.811367 | 116.555168 | -6.054538 | Normal |
| 31 | 31 | 51.529358 | 13.517847 | 35.000000 | 38.011510 | 126.718516 | 13.928331 | Normal |
| 32 | 32 | 39.087264 | 5.536602 | 26.932038 | 33.550662 | 131.584420 | -0.759461 | Normal |
| 33 | 33 | 34.649922 | 7.514783 | 43.000000 | 27.135140 | 123.987741 | -4.082938 | Normal |
| 34 | 34 | 63.026300 | 27.336240 | 51.605017 | 35.690060 | 114.506608 | 7.439870 | Normal |
| 35 | 35 | 47.805559 | 10.688698 | 54.000000 | 37.116861 | 125.391138 | -0.402523 | Normal |
| 36 | 36 | 46.637864 | 15.853717 | 40.000000 | 30.784147 | 119.377603 | 9.064582 | Normal |
| 37 | 37 | 49.828135 | 16.736435 | 28.000000 | 33.091700 | 121.435558 | 1.913307 | Normal |
| 38 | 38 | 47.319648 | 8.573680 | 35.560252 | 38.745967 | 120.576972 | 1.630664 | Normal |
| 39 | 39 | 50.753290 | 20.235060 | 37.000000 | 30.518231 | 122.343516 | 2.288488 | Normal |
| 40 | 40 | 36.157830 | -0.810514 | 33.627314 | 36.968344 | 135.936910 | -2.092507 | Normal |
| 41 | 41 | 40.746996 | 1.835524 | 50.000000 | 38.911472 | 139.247150 | 0.668557 | Normal |
| 42 | 42 | 42.918041 | -5.845994 | 58.000000 | 48.764035 | 121.606859 | -3.362045 | Normal |
| 43 | 43 | 63.792425 | 21.345323 | 66.000000 | 42.447102 | 119.550391 | 12.382604 | Normal |
| 44 | 44 | 72.955644 | 19.576971 | 61.007071 | 53.378673 | 111.234047 | 0.813491 | Normal |
| 45 | 45 | 67.538182 | 14.655042 | 58.001429 | 52.883139 | 123.632260 | 25.970206 | Normal |
| 46 | 46 | 54.752520 | 9.752520 | 48.000000 | 45.000000 | 123.037999 | 8.235294 | Normal |
| 47 | 47 | 50.160078 | -2.970024 | 42.000000 | 53.130102 | 131.802491 | -8.290203 | Normal |
| 48 | 48 | 40.349296 | 10.194748 | 37.967747 | 30.154548 | 128.009927 | 0.458901 | Normal |
| 49 | 49 | 63.619192 | 16.934508 | 49.349262 | 46.684684 | 117.089747 | -0.357812 | Normal |
| 50 | 50 | 54.142408 | 11.935110 | 43.000000 | 42.207298 | 122.209083 | 0.153549 | Normal |
| 51 | 51 | 74.976021 | 14.921705 | 53.730072 | 60.054317 | 105.645400 | 1.594748 | Normal |
| 52 | 52 | 42.517272 | 14.375671 | 25.323565 | 28.141601 | 128.905689 | 0.757020 | Normal |
| 53 | 53 | 33.788843 | 3.675110 | 25.500000 | 30.113733 | 128.325356 | -1.776111 | Normal |
| 54 | 54 | 54.503685 | 6.819910 | 47.000000 | 47.683775 | 111.791172 | -4.406769 | Normal |
| 55 | 55 | 48.170746 | 9.594217 | 39.710920 | 38.576530 | 135.623310 | 5.360051 | Normal |
| 56 | 56 | 46.374088 | 10.215902 | 42.700000 | 36.158185 | 121.247657 | -0.542022 | Normal |
| 57 | 57 | 52.862214 | 9.410372 | 46.988052 | 43.451842 | 123.091240 | 1.856659 | Normal |
| 58 | 58 | 57.145851 | 16.489091 | 42.842148 | 40.656760 | 113.806177 | 5.015186 | Normal |
| 59 | 59 | 37.140150 | 16.481240 | 24.000000 | 20.658910 | 125.014361 | 7.366425 | Normal |
| 60 | 60 | 51.311771 | 8.875541 | 57.000000 | 42.436230 | 126.472258 | -2.144044 | Normal |
| 61 | 61 | 42.515610 | 16.541216 | 42.000000 | 25.974394 | 120.631941 | 7.876731 | Normal |
| 62 | 62 | 39.358705 | 7.011262 | 37.000000 | 32.347443 | 117.818760 | 1.904048 | Normal |
| 63 | 63 | 35.877571 | 1.112374 | 43.457257 | 34.765197 | 126.923906 | -1.632238 | Normal |
| 64 | 64 | 43.191915 | 9.976664 | 28.938149 | 33.215251 | 123.467400 | 1.741018 | Normal |
| 65 | 65 | 67.289712 | 16.717514 | 51.000000 | 50.572198 | 137.591778 | 4.960344 | Normal |
| 66 | 66 | 51.325464 | 13.631223 | 33.258578 | 37.694240 | 131.306122 | 1.788870 | Normal |
| 67 | 67 | 65.756348 | 13.206926 | 44.000000 | 52.549422 | 129.393573 | -1.982120 | Normal |
| 68 | 68 | 40.413366 | -1.329412 | 30.982768 | 41.742778 | 119.335655 | -6.173675 | Normal |
| 69 | 69 | 48.801909 | 18.017762 | 52.000000 | 30.784147 | 139.150407 | 10.442862 | Normal |
| 70 | 70 | 50.086153 | 13.430044 | 34.457541 | 36.656108 | 119.134622 | 3.089484 | Normal |
| 71 | 71 | 64.261507 | 14.497866 | 43.902504 | 49.763642 | 115.388268 | 5.951454 | Normal |
| 72 | 72 | 53.683380 | 13.447022 | 41.584297 | 40.236358 | 113.913703 | 2.737035 | Normal |
| 73 | 73 | 48.995958 | 13.113820 | 51.873520 | 35.882137 | 126.398188 | 0.535472 | Normal |
| 74 | 74 | 59.167612 | 14.562749 | 43.199158 | 44.604863 | 121.035642 | 2.830504 | Normal |
| 75 | 75 | 67.804694 | 16.550662 | 43.256802 | 51.254033 | 119.685645 | 4.867540 | Normal |
| 76 | 76 | 61.734875 | 17.114312 | 46.900000 | 44.620563 | 120.920200 | 3.087726 | Normal |
| 77 | 77 | 33.041688 | -0.324678 | 19.071075 | 33.366366 | 120.388611 | 9.354365 | Normal |
| 78 | 78 | 74.565015 | 15.724320 | 58.618582 | 58.840695 | 105.417304 | 0.599247 | Normal |
| 79 | 79 | 44.430701 | 14.174264 | 32.243495 | 30.256437 | 131.717613 | -3.604255 | Normal |
| 80 | 80 | 36.422485 | 13.879424 | 20.242562 | 22.543061 | 126.076861 | 0.179717 | Normal |
| 81 | 81 | 51.079833 | 14.209935 | 35.951229 | 36.869898 | 115.803711 | 6.905090 | Normal |
| 82 | 82 | 34.756738 | 2.631740 | 29.504381 | 32.124998 | 127.139850 | -0.460894 | Normal |
| 83 | 83 | 48.902904 | 5.587589 | 55.500000 | 43.315316 | 137.108289 | 19.854759 | Normal |
| 84 | 84 | 46.236399 | 10.062770 | 37.000000 | 36.173629 | 128.063620 | -5.100053 | Normal |
| 85 | 85 | 46.426366 | 6.620795 | 48.100000 | 39.805571 | 130.350096 | 2.449382 | Normal |
| 86 | 86 | 39.656902 | 16.208839 | 36.674857 | 23.448063 | 131.922009 | -4.968980 | Normal |
| 87 | 87 | 45.575482 | 18.759135 | 33.774143 | 26.816347 | 116.797007 | 3.131910 | Normal |
| 88 | 88 | 66.507179 | 20.897672 | 31.727471 | 45.609507 | 128.902905 | 1.517203 | Normal |
| 89 | 89 | 82.905351 | 29.894119 | 58.250542 | 53.011232 | 110.708958 | 6.079338 | Normal |
| 90 | 90 | 50.676677 | 6.461501 | 35.000000 | 44.215175 | 116.587970 | -0.214711 | Normal |
| 91 | 91 | 89.014875 | 26.075981 | 69.021259 | 62.938894 | 111.481075 | 6.061508 | Normal |
| 92 | 92 | 54.600316 | 21.488974 | 29.360216 | 33.111342 | 118.343321 | -1.471067 | Normal |
| 93 | 93 | 34.382299 | 2.062683 | 32.390820 | 32.319617 | 128.300199 | -3.365516 | Normal |
| 94 | 94 | 45.075450 | 12.306951 | 44.583177 | 32.768499 | 147.894637 | -8.941709 | Normal |
| 95 | 95 | 47.903565 | 13.616688 | 36.000000 | 34.286877 | 117.449062 | -4.245395 | Normal |
| 96 | 96 | 53.936748 | 20.721496 | 29.220534 | 33.215251 | 114.365845 | -0.421010 | Normal |
| 97 | 97 | 61.446597 | 22.694968 | 46.170347 | 38.751628 | 125.670725 | -2.707880 | Normal |
| 98 | 98 | 45.252792 | 8.693157 | 41.583126 | 36.559635 | 118.545842 | 0.214750 | Normal |
| 99 | 99 | 33.841641 | 5.073991 | 36.641233 | 28.767649 | 123.945244 | -0.199249 | Normal |
| 100 | 0 | 63.027818 | 22.552586 | 39.609117 | 40.475232 | 98.672917 | -0.254400 | Type_H |
| 101 | 1 | 39.056951 | 10.060991 | 25.015378 | 28.995960 | 114.405425 | 4.564259 | Type_H |
| 102 | 2 | 68.832021 | 22.218482 | 50.092194 | 46.613539 | 105.985135 | -3.530317 | Type_H |
| 103 | 3 | 69.297008 | 24.652878 | 44.311238 | 44.644130 | 101.868495 | 11.211523 | Type_H |
| 104 | 4 | 49.712859 | 9.652075 | 28.317406 | 40.060784 | 108.168725 | 7.918501 | Type_H |
| 105 | 5 | 40.250200 | 13.921907 | 25.124950 | 26.328293 | 130.327871 | 2.230652 | Type_H |
| 106 | 6 | 53.432928 | 15.864336 | 37.165934 | 37.568592 | 120.567523 | 5.988551 | Type_H |
| 107 | 7 | 45.366754 | 10.755611 | 29.038349 | 34.611142 | 117.270068 | -10.675871 | Type_H |
| 108 | 8 | 43.790190 | 13.533753 | 42.690814 | 30.256437 | 125.002893 | 13.289018 | Type_H |
| 109 | 9 | 36.686353 | 5.010884 | 41.948751 | 31.675469 | 84.241415 | 0.664437 | Type_H |
| 110 | 10 | 49.706610 | 13.040974 | 31.334500 | 36.665635 | 108.648265 | -7.825986 | Type_H |
| 111 | 11 | 31.232387 | 17.715819 | 15.500000 | 13.516568 | 120.055399 | 0.499751 | Type_H |
| 112 | 12 | 48.915551 | 19.964556 | 40.263794 | 28.950995 | 119.321358 | 8.028895 | Type_H |
| 113 | 13 | 53.572170 | 20.460828 | 33.100000 | 33.111342 | 110.966698 | 7.044803 | Type_H |
| 114 | 14 | 57.300227 | 24.188885 | 47.000000 | 33.111342 | 116.806587 | 5.766947 | Type_H |
| 115 | 15 | 44.318907 | 12.537992 | 36.098763 | 31.780915 | 124.115836 | 5.415825 | Type_H |
| 116 | 16 | 63.834982 | 20.362507 | 54.552434 | 43.472475 | 112.309492 | -0.622527 | Type_H |
| 117 | 17 | 31.276012 | 3.144669 | 32.562996 | 28.131342 | 129.011418 | 3.623020 | Type_H |
| 118 | 18 | 38.697912 | 13.444749 | 31.000000 | 25.253163 | 123.159251 | 1.429186 | Type_H |
| 119 | 19 | 41.729963 | 12.254074 | 30.122586 | 29.475889 | 116.585706 | -1.244402 | Type_H |
| 120 | 20 | 43.922840 | 14.177959 | 37.832547 | 29.744881 | 134.461016 | 6.451648 | Type_H |
| 121 | 21 | 54.919443 | 21.062332 | 42.200000 | 33.857110 | 125.212716 | 2.432561 | Type_H |
| 122 | 22 | 63.073611 | 24.413803 | 54.000000 | 38.659808 | 106.424329 | 15.779697 | Type_H |
| 123 | 23 | 45.540790 | 13.069598 | 30.298321 | 32.471192 | 117.980830 | -4.987130 | Type_H |
| 124 | 24 | 36.125683 | 22.758753 | 29.000000 | 13.366931 | 115.577116 | -3.237562 | Type_H |
| 125 | 25 | 54.124920 | 26.650489 | 35.329747 | 27.474432 | 121.447011 | 1.571205 | Type_H |
| 126 | 26 | 26.147921 | 10.759454 | 14.000000 | 15.388468 | 125.203296 | -10.093108 | Type_H |
| 127 | 27 | 43.580964 | 16.508884 | 47.000000 | 27.072080 | 109.271634 | 8.992816 | Type_H |
| 128 | 28 | 44.551012 | 21.931147 | 26.785916 | 22.619865 | 111.072920 | 2.652321 | Type_H |
| 129 | 29 | 66.879211 | 24.891999 | 49.278597 | 41.987212 | 113.477018 | -2.005892 | Type_H |
| 130 | 30 | 50.819268 | 15.402213 | 42.528939 | 35.417055 | 112.192804 | 10.869566 | Type_H |
| 131 | 31 | 46.390260 | 11.079047 | 32.136553 | 35.311213 | 98.774546 | 6.386832 | Type_H |
| 132 | 32 | 44.936675 | 17.443838 | 27.780576 | 27.492837 | 117.980325 | 5.569620 | Type_H |
| 133 | 33 | 38.663257 | 12.986441 | 40.000000 | 25.676816 | 124.914118 | 2.703008 | Type_H |
| 134 | 34 | 59.595540 | 31.998244 | 46.560252 | 27.597296 | 119.330354 | 1.474286 | Type_H |
| 135 | 35 | 31.484218 | 7.826221 | 24.284818 | 23.657997 | 113.833145 | 4.393080 | Type_H |
| 136 | 36 | 32.090987 | 6.989378 | 35.998198 | 25.101609 | 132.264735 | 6.413428 | Type_H |
| 137 | 37 | 35.703458 | 19.443253 | 20.700000 | 16.260205 | 137.540613 | -0.263490 | Type_H |
| 138 | 38 | 55.843286 | 28.847448 | 47.690543 | 26.995838 | 123.311845 | 2.812427 | Type_H |
| 139 | 39 | 52.419385 | 19.011561 | 35.872660 | 33.407825 | 116.559771 | 1.694705 | Type_H |
| 140 | 40 | 35.492446 | 11.701672 | 15.590363 | 23.790774 | 106.938852 | -3.460358 | Type_H |
| 141 | 41 | 46.442078 | 8.395036 | 29.037230 | 38.047043 | 115.481405 | 2.045476 | Type_H |
| 142 | 42 | 53.854798 | 19.230643 | 32.779060 | 34.624155 | 121.670915 | 5.329843 | Type_H |
| 143 | 43 | 66.285394 | 26.327845 | 47.500000 | 39.957549 | 121.219684 | -0.799624 | Type_H |
| 144 | 44 | 56.030218 | 16.297915 | 62.275275 | 39.732303 | 114.023117 | -2.325684 | Type_H |
| 145 | 45 | 50.912440 | 23.015169 | 47.000000 | 27.897271 | 117.422259 | -2.526702 | Type_H |
| 146 | 46 | 48.332638 | 22.227784 | 36.181993 | 26.104854 | 117.384625 | 6.481709 | Type_H |
| 147 | 47 | 41.352504 | 16.577364 | 30.706191 | 24.775141 | 113.266675 | -4.497958 | Type_H |
| 148 | 48 | 40.557357 | 17.977784 | 34.000000 | 22.579573 | 121.046246 | -1.537383 | Type_H |
| 149 | 49 | 41.767732 | 17.899402 | 20.030886 | 23.868330 | 118.363389 | 2.062963 | Type_H |
| 150 | 50 | 55.285852 | 20.440118 | 34.000000 | 34.845733 | 115.877017 | 3.558372 | Type_H |
| 151 | 51 | 74.433593 | 41.557331 | 27.700000 | 32.876262 | 107.949304 | 5.000089 | Type_H |
| 152 | 52 | 50.209670 | 29.760122 | 36.104007 | 20.449548 | 128.292515 | 5.740614 | Type_H |
| 153 | 53 | 30.149936 | 11.917445 | 34.000000 | 18.232491 | 112.684141 | 11.463223 | Type_H |
| 154 | 54 | 41.171680 | 17.321206 | 33.469403 | 23.850474 | 116.377889 | -9.569250 | Type_H |
| 155 | 55 | 47.657730 | 13.277385 | 36.679985 | 34.380345 | 98.249781 | 6.273012 | Type_H |
| 156 | 56 | 43.349606 | 7.467469 | 28.065483 | 35.882137 | 112.776187 | 5.753277 | Type_H |
| 157 | 57 | 46.855781 | 15.351514 | 38.000000 | 31.504267 | 116.250917 | 1.662706 | Type_H |
| 158 | 58 | 43.203185 | 19.663146 | 35.000000 | 23.540039 | 124.846109 | -2.919076 | Type_H |
| 159 | 59 | 48.109236 | 14.930725 | 35.564683 | 33.178512 | 124.056452 | 7.947905 | Type_H |
| 160 | 0 | 74.377678 | 32.053104 | 78.772013 | 42.324573 | 143.560690 | 56.125906 | Type_S |
| 161 | 1 | 89.680567 | 32.704435 | 83.130732 | 56.976132 | 129.955476 | 92.027277 | Type_S |
| 162 | 2 | 44.529051 | 9.433234 | 52.000000 | 35.095817 | 134.711772 | 29.106575 | Type_S |
| 163 | 3 | 77.690577 | 21.380645 | 64.429442 | 56.309932 | 114.818751 | 26.931841 | Type_S |
| 164 | 4 | 76.147212 | 21.936186 | 82.961502 | 54.211027 | 123.932010 | 10.431972 | Type_S |
| 165 | 5 | 83.933009 | 41.286305 | 62.000000 | 42.646703 | 115.012334 | 26.588100 | Type_S |
| 166 | 6 | 78.491730 | 22.181798 | 60.000000 | 56.309932 | 118.530327 | 27.383213 | Type_S |
| 167 | 7 | 75.649731 | 19.339799 | 64.148685 | 56.309932 | 95.903629 | 69.551303 | Type_S |
| 168 | 8 | 72.076278 | 18.946176 | 51.000000 | 53.130102 | 114.213013 | 1.010041 | Type_S |
| 169 | 9 | 58.599529 | -0.261499 | 51.500000 | 58.861028 | 102.042812 | 28.059697 | Type_S |
| 170 | 10 | 72.560702 | 17.385191 | 52.000000 | 55.175511 | 119.193724 | 32.108537 | Type_S |
| 171 | 11 | 86.900794 | 32.928168 | 47.794347 | 53.972627 | 135.075364 | 101.719092 | Type_S |
| 172 | 12 | 84.974132 | 33.021175 | 60.859873 | 51.952957 | 125.659534 | 74.333409 | Type_S |
| 173 | 13 | 55.512212 | 20.095157 | 44.000000 | 35.417055 | 122.648753 | 34.552946 | Type_S |
| 174 | 14 | 72.222334 | 23.077711 | 91.000000 | 49.144624 | 137.736655 | 56.804093 | Type_S |
| 175 | 15 | 70.221452 | 39.822724 | 68.118403 | 30.398728 | 148.525562 | 145.378143 | Type_S |
| 176 | 16 | 86.753609 | 36.043016 | 69.221045 | 50.710593 | 139.414504 | 110.860782 | Type_S |
| 177 | 17 | 58.782548 | 7.667044 | 53.338941 | 51.115504 | 98.501157 | 51.584125 | Type_S |
| 178 | 18 | 67.412538 | 17.442797 | 60.144640 | 49.969741 | 111.123970 | 33.157646 | Type_S |
| 179 | 19 | 47.744679 | 12.089351 | 39.000000 | 35.655328 | 117.512004 | 21.682401 | Type_S |
| 180 | 20 | 77.106571 | 30.469994 | 69.480628 | 46.636577 | 112.151600 | 70.759083 | Type_S |
| 181 | 21 | 74.005541 | 21.122402 | 57.379502 | 52.883139 | 120.205963 | 74.555166 | Type_S |
| 182 | 22 | 88.623908 | 29.089453 | 47.564262 | 59.534455 | 121.764780 | 51.805899 | Type_S |
| 183 | 23 | 81.104100 | 24.794168 | 77.887020 | 56.309932 | 151.839857 | 65.214616 | Type_S |
| 184 | 24 | 76.326002 | 42.396204 | 57.200000 | 33.929797 | 124.267007 | 50.127457 | Type_S |
| 185 | 25 | 45.443750 | 9.906072 | 45.000000 | 35.537678 | 163.071041 | 20.315315 | Type_S |
| 186 | 26 | 59.785265 | 17.879323 | 59.206461 | 41.905942 | 119.319111 | 22.123869 | Type_S |
| 187 | 27 | 44.914149 | 10.218996 | 44.630914 | 34.695154 | 130.075660 | 37.364540 | Type_S |
| 188 | 28 | 56.605771 | 16.800200 | 42.000000 | 39.805571 | 127.294522 | 24.018575 | Type_S |
| 189 | 29 | 71.186811 | 23.896201 | 43.696665 | 47.290610 | 119.864938 | 27.283985 | Type_S |
| 190 | 30 | 81.656032 | 28.748869 | 58.232821 | 52.907163 | 114.769856 | 30.609148 | Type_S |
| 191 | 31 | 70.952728 | 20.159931 | 62.859109 | 50.792797 | 116.177932 | 32.522331 | Type_S |
| 192 | 32 | 85.352315 | 15.844910 | 71.668660 | 69.507405 | 124.419787 | 76.020603 | Type_S |
| 193 | 33 | 58.101935 | 14.837639 | 79.649838 | 43.264295 | 113.587655 | 50.237878 | Type_S |
| 194 | 34 | 94.174822 | 15.380770 | 67.705721 | 78.794052 | 114.890113 | 53.255220 | Type_S |
| 195 | 35 | 57.522356 | 33.647075 | 50.909858 | 23.875281 | 140.981712 | 148.753711 | Type_S |
| 196 | 36 | 96.657315 | 19.461581 | 90.211498 | 77.195734 | 120.673041 | 64.080998 | Type_S |
| 197 | 37 | 74.720746 | 19.756942 | 82.735360 | 54.963804 | 109.356594 | 33.306067 | Type_S |
| 198 | 38 | 77.655119 | 22.432950 | 93.892779 | 55.222169 | 123.055707 | 61.211187 | Type_S |
| 199 | 39 | 58.521623 | 13.922286 | 41.467855 | 44.599337 | 115.514798 | 30.387984 | Type_S |
| 200 | 40 | 84.585607 | 30.361685 | 65.479486 | 54.223922 | 108.010219 | 25.118478 | Type_S |
| 201 | 41 | 79.938570 | 18.774071 | 63.311835 | 61.164499 | 114.787107 | 38.538741 | Type_S |
| 202 | 42 | 70.399308 | 13.469986 | 61.200000 | 56.929322 | 102.337524 | 25.538429 | Type_S |
| 203 | 43 | 49.782121 | 6.466805 | 53.000000 | 43.315316 | 110.864783 | 25.335647 | Type_S |
| 204 | 44 | 77.409333 | 29.396545 | 63.232302 | 48.012788 | 118.450731 | 93.563737 | Type_S |
| 205 | 45 | 65.007964 | 27.602608 | 50.947519 | 37.405357 | 116.581109 | 7.015978 | Type_S |
| 206 | 46 | 65.013773 | 9.838262 | 57.735837 | 55.175511 | 94.738525 | 49.696955 | Type_S |
| 207 | 47 | 78.425951 | 33.425951 | 76.277439 | 45.000000 | 138.554111 | 77.155172 | Type_S |
| 208 | 48 | 63.172987 | 6.330911 | 63.000000 | 56.842076 | 110.644021 | 42.608076 | Type_S |
| 209 | 49 | 68.613001 | 15.082235 | 63.014696 | 53.530766 | 123.431174 | 39.497987 | Type_S |
| 210 | 50 | 63.900633 | 13.706204 | 62.124334 | 50.194429 | 114.129243 | 41.422828 | Type_S |
| 211 | 51 | 84.998956 | 29.610098 | 83.352194 | 55.388858 | 126.912990 | 71.321175 | Type_S |
| 212 | 52 | 42.021386 | -6.554948 | 67.900000 | 48.576334 | 111.585782 | 27.338671 | Type_S |
| 213 | 53 | 69.756665 | 19.279297 | 48.500000 | 50.477369 | 96.491370 | 51.169640 | Type_S |
| 214 | 54 | 80.988074 | 36.843172 | 86.960602 | 44.144903 | 141.088149 | 85.872152 | Type_S |
| 215 | 55 | 129.834041 | 8.404475 | 48.384057 | 121.429566 | 107.690466 | 418.543082 | Type_S |
| 216 | 56 | 70.484104 | 12.489488 | 62.417142 | 57.994617 | 114.190049 | 56.902448 | Type_S |
| 217 | 57 | 86.041280 | 38.750670 | 47.871405 | 47.290610 | 122.092954 | 61.988277 | Type_S |
| 218 | 58 | 65.536003 | 24.157487 | 45.775170 | 41.378515 | 136.440302 | 16.378086 | Type_S |
| 219 | 59 | 60.753894 | 15.753894 | 43.199158 | 45.000000 | 113.053331 | 31.693548 | Type_S |
| 220 | 60 | 54.741775 | 12.095072 | 41.000000 | 42.646703 | 117.643219 | 40.382327 | Type_S |
| 221 | 61 | 83.879941 | 23.077427 | 87.141512 | 60.802514 | 124.646072 | 80.555605 | Type_S |
| 222 | 62 | 80.074914 | 48.069531 | 52.403439 | 32.005383 | 110.709912 | 67.727316 | Type_S |
| 223 | 63 | 65.665347 | 10.540675 | 56.489135 | 55.124672 | 109.162777 | 53.932020 | Type_S |
| 224 | 64 | 74.717228 | 14.321679 | 32.500000 | 60.395549 | 107.182218 | 37.017080 | Type_S |
| 225 | 65 | 48.060626 | 5.687032 | 57.057161 | 42.373594 | 95.443757 | 32.835877 | Type_S |
| 226 | 66 | 70.676898 | 21.704402 | 59.181161 | 48.972496 | 103.008355 | 27.810148 | Type_S |
| 227 | 67 | 80.433428 | 16.998479 | 66.536018 | 63.434949 | 116.438981 | 57.781250 | Type_S |
| 228 | 68 | 90.513961 | 28.272501 | 69.813942 | 62.241459 | 100.892160 | 58.823648 | Type_S |
| 229 | 69 | 77.236898 | 16.737622 | 49.775534 | 60.499275 | 110.690377 | 39.787154 | Type_S |
| 230 | 70 | 50.066786 | 9.120340 | 32.168463 | 40.946446 | 99.712453 | 26.766697 | Type_S |
| 231 | 71 | 69.781006 | 13.777465 | 58.000000 | 56.003541 | 118.930666 | 17.914560 | Type_S |
| 232 | 72 | 69.626283 | 21.122751 | 52.766595 | 48.503532 | 116.803091 | 54.816867 | Type_S |
| 233 | 73 | 81.754419 | 20.123466 | 70.560440 | 61.630954 | 119.425086 | 55.506889 | Type_S |
| 234 | 74 | 52.204693 | 17.212673 | 78.094969 | 34.992020 | 136.972517 | 54.939134 | Type_S |
| 235 | 75 | 77.121344 | 30.349874 | 77.481083 | 46.771470 | 110.611148 | 82.093607 | Type_S |
| 236 | 76 | 88.024499 | 39.844669 | 81.774473 | 48.179830 | 116.601538 | 56.766083 | Type_S |
| 237 | 77 | 83.396606 | 34.310989 | 78.423293 | 49.085617 | 110.466516 | 49.672096 | Type_S |
| 238 | 78 | 72.054034 | 24.700737 | 79.874016 | 47.353297 | 107.172358 | 56.426159 | Type_S |
| 239 | 79 | 85.095503 | 21.069897 | 91.734792 | 64.025606 | 109.062312 | 38.032831 | Type_S |
| 240 | 80 | 69.563486 | 15.401139 | 74.438497 | 54.162347 | 105.067356 | 29.701211 | Type_S |
| 241 | 81 | 89.504947 | 48.903653 | 72.003423 | 40.601295 | 134.634291 | 118.353370 | Type_S |
| 242 | 82 | 85.290173 | 18.278890 | 100.744220 | 67.011283 | 110.660700 | 58.884948 | Type_S |
| 243 | 83 | 60.626217 | 20.595958 | 64.535262 | 40.030259 | 117.225554 | 104.859247 | Type_S |
| 244 | 84 | 60.044177 | 14.309656 | 58.038865 | 45.734521 | 105.131664 | 30.409133 | Type_S |
| 245 | 85 | 85.643787 | 42.689195 | 78.750664 | 42.954592 | 105.144076 | 42.887426 | Type_S |
| 246 | 86 | 85.581710 | 30.457039 | 78.231379 | 55.124672 | 114.866049 | 68.376122 | Type_S |
| 247 | 87 | 55.080766 | -3.759930 | 56.000000 | 58.840695 | 109.915367 | 31.773583 | Type_S |
| 248 | 88 | 65.755679 | 9.832874 | 50.822895 | 55.922805 | 104.394959 | 39.307212 | Type_S |
| 249 | 89 | 79.249671 | 23.944825 | 40.796698 | 55.304846 | 98.622512 | 36.706395 | Type_S |
| 250 | 90 | 81.112605 | 20.690444 | 60.687006 | 60.422161 | 94.018783 | 40.510982 | Type_S |
| 251 | 91 | 48.030624 | 3.969815 | 58.344519 | 44.060809 | 125.350962 | 35.000078 | Type_S |
| 252 | 92 | 63.404481 | 14.115327 | 48.136806 | 49.289153 | 111.916007 | 31.784495 | Type_S |
| 253 | 93 | 57.286945 | 15.149350 | 64.000000 | 42.137595 | 116.735387 | 30.341203 | Type_S |
| 254 | 94 | 41.187770 | 5.792974 | 42.867392 | 35.394796 | 103.348880 | 27.660277 | Type_S |
| 255 | 95 | 66.804796 | 14.551602 | 72.084912 | 52.253195 | 82.456038 | 41.685474 | Type_S |
| 256 | 96 | 79.476978 | 26.732268 | 70.650982 | 52.744711 | 118.588669 | 61.700598 | Type_S |
| 257 | 97 | 44.216464 | 1.507075 | 46.110339 | 42.709390 | 108.629567 | 42.810481 | Type_S |
| 258 | 98 | 57.035097 | 0.345728 | 49.198003 | 56.689369 | 103.048698 | 52.165145 | Type_S |
| 259 | 99 | 64.274818 | 12.508643 | 68.702377 | 51.766175 | 95.252454 | 39.409826 | Type_S |
| 260 | 100 | 92.026308 | 35.392674 | 77.416963 | 56.633634 | 115.723530 | 58.057542 | Type_S |
| 261 | 101 | 67.263149 | 7.194661 | 51.696887 | 60.068488 | 97.801085 | 42.136943 | Type_S |
| 262 | 102 | 118.144655 | 38.449501 | 50.838520 | 79.695154 | 81.024541 | 74.043767 | Type_S |
| 263 | 103 | 115.923261 | 37.515436 | 76.800000 | 78.407825 | 104.698603 | 81.198927 | Type_S |
| 264 | 104 | 53.941658 | 9.306594 | 43.100498 | 44.635064 | 124.397821 | 25.082127 | Type_S |
| 265 | 105 | 83.703177 | 20.268229 | 77.110598 | 63.434949 | 125.480174 | 69.279571 | Type_S |
| 266 | 106 | 56.991404 | 6.874089 | 57.009005 | 50.117315 | 109.978045 | 36.810111 | Type_S |
| 267 | 107 | 72.343594 | 16.420790 | 59.869012 | 55.922805 | 70.082575 | 12.072644 | Type_S |
| 268 | 108 | 95.382596 | 24.822631 | 95.157633 | 70.559965 | 89.307547 | 57.660841 | Type_S |
| 269 | 109 | 44.253476 | 1.101087 | 38.000000 | 43.152390 | 98.274107 | 23.910635 | Type_S |
| 270 | 110 | 64.809541 | 15.174078 | 58.839994 | 49.635463 | 111.679961 | 21.407198 | Type_S |
| 271 | 111 | 78.401254 | 14.042260 | 79.694263 | 64.358994 | 104.731234 | 12.392853 | Type_S |
| 272 | 112 | 56.668293 | 13.458203 | 43.769710 | 43.210089 | 93.692209 | 21.108121 | Type_S |
| 273 | 113 | 50.825029 | 9.064729 | 56.300000 | 41.760300 | 78.999454 | 23.041524 | Type_S |
| 274 | 114 | 61.411737 | 25.384364 | 39.096869 | 36.027373 | 103.404597 | 21.843407 | Type_S |
| 275 | 115 | 56.563824 | 8.961262 | 52.577846 | 47.602562 | 98.777115 | 50.701873 | Type_S |
| 276 | 116 | 67.027664 | 13.281502 | 66.150403 | 53.746162 | 100.715413 | 33.989136 | Type_S |
| 277 | 117 | 80.817771 | 19.238981 | 61.642451 | 61.578791 | 89.471834 | 44.167602 | Type_S |
| 278 | 118 | 80.654320 | 26.344379 | 60.898118 | 54.309940 | 120.103493 | 52.467552 | Type_S |
| 279 | 119 | 68.721910 | 49.431864 | 68.056012 | 19.290046 | 125.018517 | 54.691289 | Type_S |
| 280 | 120 | 37.903910 | 4.479099 | 24.710274 | 33.424811 | 157.848799 | 33.607027 | Type_S |
| 281 | 121 | 64.624008 | 15.225303 | 67.632167 | 49.398705 | 90.298468 | 31.326411 | Type_S |
| 282 | 122 | 75.437748 | 31.539454 | 89.600000 | 43.898294 | 106.829590 | 54.965789 | Type_S |
| 283 | 123 | 71.001941 | 37.515772 | 84.537093 | 33.486169 | 125.164232 | 67.771190 | Type_S |
| 284 | 124 | 81.056611 | 20.801492 | 91.784495 | 60.255119 | 125.430176 | 38.181782 | Type_S |
| 285 | 125 | 91.468741 | 24.508177 | 84.620272 | 66.960564 | 117.307897 | 52.623047 | Type_S |
| 286 | 126 | 81.082320 | 21.255840 | 78.766756 | 59.826480 | 90.071880 | 49.159426 | Type_S |
| 287 | 127 | 60.419932 | 5.265665 | 59.814236 | 55.154267 | 109.033075 | 30.265785 | Type_S |
| 288 | 128 | 85.680950 | 38.650035 | 82.680977 | 47.030914 | 120.840707 | 61.959034 | Type_S |
| 289 | 129 | 82.406524 | 29.276422 | 77.054565 | 53.130102 | 117.042244 | 62.765348 | Type_S |
| 290 | 130 | 43.718262 | 9.811985 | 52.000000 | 33.906277 | 88.434242 | 40.880923 | Type_S |
| 291 | 131 | 86.472905 | 40.303766 | 61.141012 | 46.169139 | 97.404189 | 55.752221 | Type_S |
| 292 | 132 | 74.469082 | 33.283157 | 66.942101 | 41.185925 | 146.466001 | 124.984406 | Type_S |
| 293 | 133 | 70.250436 | 10.340123 | 76.370070 | 59.910314 | 119.237007 | 32.666502 | Type_S |
| 294 | 134 | 72.643850 | 18.929117 | 68.000000 | 53.714733 | 116.963416 | 25.384247 | Type_S |
| 295 | 135 | 71.241764 | 5.268270 | 85.999584 | 65.973493 | 110.703107 | 38.259864 | Type_S |
| 296 | 136 | 63.772391 | 12.763385 | 65.360524 | 51.009006 | 89.822741 | 55.995454 | Type_S |
| 297 | 137 | 58.828379 | 37.577873 | 125.742385 | 21.250506 | 135.629418 | 117.314683 | Type_S |
| 298 | 138 | 74.854480 | 13.909084 | 62.693259 | 60.945396 | 115.208701 | 33.172255 | Type_S |
| 299 | 139 | 75.298478 | 16.671484 | 61.296204 | 58.626995 | 118.883388 | 31.575823 | Type_S |
| 300 | 140 | 63.364339 | 20.024621 | 67.498705 | 43.339718 | 130.999258 | 37.556706 | Type_S |
| 301 | 141 | 67.513053 | 33.275590 | 96.283062 | 34.237463 | 145.601033 | 88.301486 | Type_S |
| 302 | 142 | 76.314028 | 41.933683 | 93.284863 | 34.380345 | 132.267285 | 101.218783 | Type_S |
| 303 | 143 | 73.635962 | 9.711318 | 63.000000 | 63.924644 | 98.727930 | 26.975787 | Type_S |
| 304 | 144 | 56.535051 | 14.377189 | 44.991547 | 42.157862 | 101.723334 | 25.773174 | Type_S |
| 305 | 145 | 80.111572 | 33.942432 | 85.101608 | 46.169139 | 125.593624 | 100.292107 | Type_S |
| 306 | 146 | 95.480229 | 46.550053 | 59.000000 | 48.930176 | 96.683903 | 77.283072 | Type_S |
| 307 | 147 | 74.094731 | 18.823727 | 76.032156 | 55.271004 | 128.405731 | 73.388216 | Type_S |
| 308 | 148 | 87.679087 | 20.365613 | 93.822416 | 67.313473 | 120.944829 | 76.730629 | Type_S |
| 309 | 149 | 48.259920 | 16.417462 | 36.329137 | 31.842457 | 94.882336 | 28.343799 | Type_S |
f = ['index']
final_set.drop(columns=f, inplace=True)
final_set
| P_incidence | P_tilt | L_angle | S_slope | P_radius | S_Degree | Class | |
|---|---|---|---|---|---|---|---|
| 0 | 38.505273 | 16.964297 | 35.112814 | 21.540976 | 127.632875 | 7.986683 | Normal |
| 1 | 54.920858 | 18.968430 | 51.601455 | 35.952428 | 125.846646 | 2.001642 | Normal |
| 2 | 44.362490 | 8.945435 | 46.902096 | 35.417055 | 129.220682 | 4.994195 | Normal |
| 3 | 48.318931 | 17.452121 | 48.000000 | 30.866809 | 128.980308 | -0.910941 | Normal |
| 4 | 45.701789 | 10.659859 | 42.577846 | 35.041929 | 130.178314 | -3.388910 | Normal |
| 5 | 30.741938 | 13.354966 | 35.903526 | 17.386972 | 142.410107 | -2.005373 | Normal |
| 6 | 50.913101 | 6.677000 | 30.896522 | 44.236102 | 118.151531 | -1.057986 | Normal |
| 7 | 38.126589 | 6.557617 | 50.445075 | 31.568971 | 132.114805 | 6.338199 | Normal |
| 8 | 51.624672 | 15.969344 | 35.000000 | 35.655328 | 129.385308 | 1.009228 | Normal |
| 9 | 64.311867 | 26.328369 | 50.958964 | 37.983498 | 106.177751 | 3.118221 | Normal |
| 10 | 44.489275 | 21.786433 | 31.474154 | 22.702842 | 113.778494 | -0.284129 | Normal |
| 11 | 54.950970 | 5.865353 | 53.000000 | 49.085617 | 126.970328 | -0.631603 | Normal |
| 12 | 56.103774 | 13.106307 | 62.637020 | 42.997467 | 116.228503 | 31.172767 | Normal |
| 13 | 69.398818 | 18.898407 | 75.966361 | 50.500411 | 103.582540 | -0.443661 | Normal |
| 14 | 89.834676 | 22.639217 | 90.563461 | 67.195460 | 100.501192 | 3.040973 | Normal |
| 15 | 59.726140 | 7.724873 | 55.343485 | 52.001268 | 125.174221 | 3.235159 | Normal |
| 16 | 63.959522 | 16.060945 | 63.123736 | 47.898577 | 142.360125 | 6.298971 | Normal |
| 17 | 61.540599 | 19.676957 | 52.892229 | 41.863642 | 118.686268 | 4.815031 | Normal |
| 18 | 38.046551 | 8.301669 | 26.236830 | 29.744881 | 123.803413 | 3.885773 | Normal |
| 19 | 43.436451 | 10.095743 | 36.032224 | 33.340707 | 137.439694 | -3.114451 | Normal |
| 20 | 65.611802 | 23.137919 | 62.582179 | 42.473883 | 124.128001 | -4.083298 | Normal |
| 21 | 53.911054 | 12.939318 | 39.000000 | 40.971736 | 118.193035 | 5.074353 | Normal |
| 22 | 43.117951 | 13.815744 | 40.347388 | 29.302207 | 128.517722 | 0.970926 | Normal |
| 23 | 40.683229 | 9.148437 | 31.021593 | 31.534792 | 139.118472 | -2.511619 | Normal |
| 24 | 37.731992 | 9.386298 | 42.000000 | 28.345694 | 135.740926 | 13.683047 | Normal |
| 25 | 63.929470 | 19.971097 | 40.177050 | 43.958373 | 113.065939 | -11.058179 | Normal |
| 26 | 61.821627 | 13.597105 | 64.000000 | 48.224523 | 121.779803 | 1.296191 | Normal |
| 27 | 62.140805 | 13.960975 | 58.000000 | 48.179830 | 133.281834 | 4.955106 | Normal |
| 28 | 69.004913 | 13.291790 | 55.570143 | 55.713123 | 126.611622 | 10.832011 | Normal |
| 29 | 56.447026 | 19.444499 | 43.577846 | 37.002527 | 139.189690 | -1.859689 | Normal |
| 30 | 41.646916 | 8.835549 | 36.031975 | 32.811367 | 116.555168 | -6.054538 | Normal |
| 31 | 51.529358 | 13.517847 | 35.000000 | 38.011510 | 126.718516 | 13.928331 | Normal |
| 32 | 39.087264 | 5.536602 | 26.932038 | 33.550662 | 131.584420 | -0.759461 | Normal |
| 33 | 34.649922 | 7.514783 | 43.000000 | 27.135140 | 123.987741 | -4.082938 | Normal |
| 34 | 63.026300 | 27.336240 | 51.605017 | 35.690060 | 114.506608 | 7.439870 | Normal |
| 35 | 47.805559 | 10.688698 | 54.000000 | 37.116861 | 125.391138 | -0.402523 | Normal |
| 36 | 46.637864 | 15.853717 | 40.000000 | 30.784147 | 119.377603 | 9.064582 | Normal |
| 37 | 49.828135 | 16.736435 | 28.000000 | 33.091700 | 121.435558 | 1.913307 | Normal |
| 38 | 47.319648 | 8.573680 | 35.560252 | 38.745967 | 120.576972 | 1.630664 | Normal |
| 39 | 50.753290 | 20.235060 | 37.000000 | 30.518231 | 122.343516 | 2.288488 | Normal |
| 40 | 36.157830 | -0.810514 | 33.627314 | 36.968344 | 135.936910 | -2.092507 | Normal |
| 41 | 40.746996 | 1.835524 | 50.000000 | 38.911472 | 139.247150 | 0.668557 | Normal |
| 42 | 42.918041 | -5.845994 | 58.000000 | 48.764035 | 121.606859 | -3.362045 | Normal |
| 43 | 63.792425 | 21.345323 | 66.000000 | 42.447102 | 119.550391 | 12.382604 | Normal |
| 44 | 72.955644 | 19.576971 | 61.007071 | 53.378673 | 111.234047 | 0.813491 | Normal |
| 45 | 67.538182 | 14.655042 | 58.001429 | 52.883139 | 123.632260 | 25.970206 | Normal |
| 46 | 54.752520 | 9.752520 | 48.000000 | 45.000000 | 123.037999 | 8.235294 | Normal |
| 47 | 50.160078 | -2.970024 | 42.000000 | 53.130102 | 131.802491 | -8.290203 | Normal |
| 48 | 40.349296 | 10.194748 | 37.967747 | 30.154548 | 128.009927 | 0.458901 | Normal |
| 49 | 63.619192 | 16.934508 | 49.349262 | 46.684684 | 117.089747 | -0.357812 | Normal |
| 50 | 54.142408 | 11.935110 | 43.000000 | 42.207298 | 122.209083 | 0.153549 | Normal |
| 51 | 74.976021 | 14.921705 | 53.730072 | 60.054317 | 105.645400 | 1.594748 | Normal |
| 52 | 42.517272 | 14.375671 | 25.323565 | 28.141601 | 128.905689 | 0.757020 | Normal |
| 53 | 33.788843 | 3.675110 | 25.500000 | 30.113733 | 128.325356 | -1.776111 | Normal |
| 54 | 54.503685 | 6.819910 | 47.000000 | 47.683775 | 111.791172 | -4.406769 | Normal |
| 55 | 48.170746 | 9.594217 | 39.710920 | 38.576530 | 135.623310 | 5.360051 | Normal |
| 56 | 46.374088 | 10.215902 | 42.700000 | 36.158185 | 121.247657 | -0.542022 | Normal |
| 57 | 52.862214 | 9.410372 | 46.988052 | 43.451842 | 123.091240 | 1.856659 | Normal |
| 58 | 57.145851 | 16.489091 | 42.842148 | 40.656760 | 113.806177 | 5.015186 | Normal |
| 59 | 37.140150 | 16.481240 | 24.000000 | 20.658910 | 125.014361 | 7.366425 | Normal |
| 60 | 51.311771 | 8.875541 | 57.000000 | 42.436230 | 126.472258 | -2.144044 | Normal |
| 61 | 42.515610 | 16.541216 | 42.000000 | 25.974394 | 120.631941 | 7.876731 | Normal |
| 62 | 39.358705 | 7.011262 | 37.000000 | 32.347443 | 117.818760 | 1.904048 | Normal |
| 63 | 35.877571 | 1.112374 | 43.457257 | 34.765197 | 126.923906 | -1.632238 | Normal |
| 64 | 43.191915 | 9.976664 | 28.938149 | 33.215251 | 123.467400 | 1.741018 | Normal |
| 65 | 67.289712 | 16.717514 | 51.000000 | 50.572198 | 137.591778 | 4.960344 | Normal |
| 66 | 51.325464 | 13.631223 | 33.258578 | 37.694240 | 131.306122 | 1.788870 | Normal |
| 67 | 65.756348 | 13.206926 | 44.000000 | 52.549422 | 129.393573 | -1.982120 | Normal |
| 68 | 40.413366 | -1.329412 | 30.982768 | 41.742778 | 119.335655 | -6.173675 | Normal |
| 69 | 48.801909 | 18.017762 | 52.000000 | 30.784147 | 139.150407 | 10.442862 | Normal |
| 70 | 50.086153 | 13.430044 | 34.457541 | 36.656108 | 119.134622 | 3.089484 | Normal |
| 71 | 64.261507 | 14.497866 | 43.902504 | 49.763642 | 115.388268 | 5.951454 | Normal |
| 72 | 53.683380 | 13.447022 | 41.584297 | 40.236358 | 113.913703 | 2.737035 | Normal |
| 73 | 48.995958 | 13.113820 | 51.873520 | 35.882137 | 126.398188 | 0.535472 | Normal |
| 74 | 59.167612 | 14.562749 | 43.199158 | 44.604863 | 121.035642 | 2.830504 | Normal |
| 75 | 67.804694 | 16.550662 | 43.256802 | 51.254033 | 119.685645 | 4.867540 | Normal |
| 76 | 61.734875 | 17.114312 | 46.900000 | 44.620563 | 120.920200 | 3.087726 | Normal |
| 77 | 33.041688 | -0.324678 | 19.071075 | 33.366366 | 120.388611 | 9.354365 | Normal |
| 78 | 74.565015 | 15.724320 | 58.618582 | 58.840695 | 105.417304 | 0.599247 | Normal |
| 79 | 44.430701 | 14.174264 | 32.243495 | 30.256437 | 131.717613 | -3.604255 | Normal |
| 80 | 36.422485 | 13.879424 | 20.242562 | 22.543061 | 126.076861 | 0.179717 | Normal |
| 81 | 51.079833 | 14.209935 | 35.951229 | 36.869898 | 115.803711 | 6.905090 | Normal |
| 82 | 34.756738 | 2.631740 | 29.504381 | 32.124998 | 127.139850 | -0.460894 | Normal |
| 83 | 48.902904 | 5.587589 | 55.500000 | 43.315316 | 137.108289 | 19.854759 | Normal |
| 84 | 46.236399 | 10.062770 | 37.000000 | 36.173629 | 128.063620 | -5.100053 | Normal |
| 85 | 46.426366 | 6.620795 | 48.100000 | 39.805571 | 130.350096 | 2.449382 | Normal |
| 86 | 39.656902 | 16.208839 | 36.674857 | 23.448063 | 131.922009 | -4.968980 | Normal |
| 87 | 45.575482 | 18.759135 | 33.774143 | 26.816347 | 116.797007 | 3.131910 | Normal |
| 88 | 66.507179 | 20.897672 | 31.727471 | 45.609507 | 128.902905 | 1.517203 | Normal |
| 89 | 82.905351 | 29.894119 | 58.250542 | 53.011232 | 110.708958 | 6.079338 | Normal |
| 90 | 50.676677 | 6.461501 | 35.000000 | 44.215175 | 116.587970 | -0.214711 | Normal |
| 91 | 89.014875 | 26.075981 | 69.021259 | 62.938894 | 111.481075 | 6.061508 | Normal |
| 92 | 54.600316 | 21.488974 | 29.360216 | 33.111342 | 118.343321 | -1.471067 | Normal |
| 93 | 34.382299 | 2.062683 | 32.390820 | 32.319617 | 128.300199 | -3.365516 | Normal |
| 94 | 45.075450 | 12.306951 | 44.583177 | 32.768499 | 147.894637 | -8.941709 | Normal |
| 95 | 47.903565 | 13.616688 | 36.000000 | 34.286877 | 117.449062 | -4.245395 | Normal |
| 96 | 53.936748 | 20.721496 | 29.220534 | 33.215251 | 114.365845 | -0.421010 | Normal |
| 97 | 61.446597 | 22.694968 | 46.170347 | 38.751628 | 125.670725 | -2.707880 | Normal |
| 98 | 45.252792 | 8.693157 | 41.583126 | 36.559635 | 118.545842 | 0.214750 | Normal |
| 99 | 33.841641 | 5.073991 | 36.641233 | 28.767649 | 123.945244 | -0.199249 | Normal |
| 100 | 63.027818 | 22.552586 | 39.609117 | 40.475232 | 98.672917 | -0.254400 | Type_H |
| 101 | 39.056951 | 10.060991 | 25.015378 | 28.995960 | 114.405425 | 4.564259 | Type_H |
| 102 | 68.832021 | 22.218482 | 50.092194 | 46.613539 | 105.985135 | -3.530317 | Type_H |
| 103 | 69.297008 | 24.652878 | 44.311238 | 44.644130 | 101.868495 | 11.211523 | Type_H |
| 104 | 49.712859 | 9.652075 | 28.317406 | 40.060784 | 108.168725 | 7.918501 | Type_H |
| 105 | 40.250200 | 13.921907 | 25.124950 | 26.328293 | 130.327871 | 2.230652 | Type_H |
| 106 | 53.432928 | 15.864336 | 37.165934 | 37.568592 | 120.567523 | 5.988551 | Type_H |
| 107 | 45.366754 | 10.755611 | 29.038349 | 34.611142 | 117.270068 | -10.675871 | Type_H |
| 108 | 43.790190 | 13.533753 | 42.690814 | 30.256437 | 125.002893 | 13.289018 | Type_H |
| 109 | 36.686353 | 5.010884 | 41.948751 | 31.675469 | 84.241415 | 0.664437 | Type_H |
| 110 | 49.706610 | 13.040974 | 31.334500 | 36.665635 | 108.648265 | -7.825986 | Type_H |
| 111 | 31.232387 | 17.715819 | 15.500000 | 13.516568 | 120.055399 | 0.499751 | Type_H |
| 112 | 48.915551 | 19.964556 | 40.263794 | 28.950995 | 119.321358 | 8.028895 | Type_H |
| 113 | 53.572170 | 20.460828 | 33.100000 | 33.111342 | 110.966698 | 7.044803 | Type_H |
| 114 | 57.300227 | 24.188885 | 47.000000 | 33.111342 | 116.806587 | 5.766947 | Type_H |
| 115 | 44.318907 | 12.537992 | 36.098763 | 31.780915 | 124.115836 | 5.415825 | Type_H |
| 116 | 63.834982 | 20.362507 | 54.552434 | 43.472475 | 112.309492 | -0.622527 | Type_H |
| 117 | 31.276012 | 3.144669 | 32.562996 | 28.131342 | 129.011418 | 3.623020 | Type_H |
| 118 | 38.697912 | 13.444749 | 31.000000 | 25.253163 | 123.159251 | 1.429186 | Type_H |
| 119 | 41.729963 | 12.254074 | 30.122586 | 29.475889 | 116.585706 | -1.244402 | Type_H |
| 120 | 43.922840 | 14.177959 | 37.832547 | 29.744881 | 134.461016 | 6.451648 | Type_H |
| 121 | 54.919443 | 21.062332 | 42.200000 | 33.857110 | 125.212716 | 2.432561 | Type_H |
| 122 | 63.073611 | 24.413803 | 54.000000 | 38.659808 | 106.424329 | 15.779697 | Type_H |
| 123 | 45.540790 | 13.069598 | 30.298321 | 32.471192 | 117.980830 | -4.987130 | Type_H |
| 124 | 36.125683 | 22.758753 | 29.000000 | 13.366931 | 115.577116 | -3.237562 | Type_H |
| 125 | 54.124920 | 26.650489 | 35.329747 | 27.474432 | 121.447011 | 1.571205 | Type_H |
| 126 | 26.147921 | 10.759454 | 14.000000 | 15.388468 | 125.203296 | -10.093108 | Type_H |
| 127 | 43.580964 | 16.508884 | 47.000000 | 27.072080 | 109.271634 | 8.992816 | Type_H |
| 128 | 44.551012 | 21.931147 | 26.785916 | 22.619865 | 111.072920 | 2.652321 | Type_H |
| 129 | 66.879211 | 24.891999 | 49.278597 | 41.987212 | 113.477018 | -2.005892 | Type_H |
| 130 | 50.819268 | 15.402213 | 42.528939 | 35.417055 | 112.192804 | 10.869566 | Type_H |
| 131 | 46.390260 | 11.079047 | 32.136553 | 35.311213 | 98.774546 | 6.386832 | Type_H |
| 132 | 44.936675 | 17.443838 | 27.780576 | 27.492837 | 117.980325 | 5.569620 | Type_H |
| 133 | 38.663257 | 12.986441 | 40.000000 | 25.676816 | 124.914118 | 2.703008 | Type_H |
| 134 | 59.595540 | 31.998244 | 46.560252 | 27.597296 | 119.330354 | 1.474286 | Type_H |
| 135 | 31.484218 | 7.826221 | 24.284818 | 23.657997 | 113.833145 | 4.393080 | Type_H |
| 136 | 32.090987 | 6.989378 | 35.998198 | 25.101609 | 132.264735 | 6.413428 | Type_H |
| 137 | 35.703458 | 19.443253 | 20.700000 | 16.260205 | 137.540613 | -0.263490 | Type_H |
| 138 | 55.843286 | 28.847448 | 47.690543 | 26.995838 | 123.311845 | 2.812427 | Type_H |
| 139 | 52.419385 | 19.011561 | 35.872660 | 33.407825 | 116.559771 | 1.694705 | Type_H |
| 140 | 35.492446 | 11.701672 | 15.590363 | 23.790774 | 106.938852 | -3.460358 | Type_H |
| 141 | 46.442078 | 8.395036 | 29.037230 | 38.047043 | 115.481405 | 2.045476 | Type_H |
| 142 | 53.854798 | 19.230643 | 32.779060 | 34.624155 | 121.670915 | 5.329843 | Type_H |
| 143 | 66.285394 | 26.327845 | 47.500000 | 39.957549 | 121.219684 | -0.799624 | Type_H |
| 144 | 56.030218 | 16.297915 | 62.275275 | 39.732303 | 114.023117 | -2.325684 | Type_H |
| 145 | 50.912440 | 23.015169 | 47.000000 | 27.897271 | 117.422259 | -2.526702 | Type_H |
| 146 | 48.332638 | 22.227784 | 36.181993 | 26.104854 | 117.384625 | 6.481709 | Type_H |
| 147 | 41.352504 | 16.577364 | 30.706191 | 24.775141 | 113.266675 | -4.497958 | Type_H |
| 148 | 40.557357 | 17.977784 | 34.000000 | 22.579573 | 121.046246 | -1.537383 | Type_H |
| 149 | 41.767732 | 17.899402 | 20.030886 | 23.868330 | 118.363389 | 2.062963 | Type_H |
| 150 | 55.285852 | 20.440118 | 34.000000 | 34.845733 | 115.877017 | 3.558372 | Type_H |
| 151 | 74.433593 | 41.557331 | 27.700000 | 32.876262 | 107.949304 | 5.000089 | Type_H |
| 152 | 50.209670 | 29.760122 | 36.104007 | 20.449548 | 128.292515 | 5.740614 | Type_H |
| 153 | 30.149936 | 11.917445 | 34.000000 | 18.232491 | 112.684141 | 11.463223 | Type_H |
| 154 | 41.171680 | 17.321206 | 33.469403 | 23.850474 | 116.377889 | -9.569250 | Type_H |
| 155 | 47.657730 | 13.277385 | 36.679985 | 34.380345 | 98.249781 | 6.273012 | Type_H |
| 156 | 43.349606 | 7.467469 | 28.065483 | 35.882137 | 112.776187 | 5.753277 | Type_H |
| 157 | 46.855781 | 15.351514 | 38.000000 | 31.504267 | 116.250917 | 1.662706 | Type_H |
| 158 | 43.203185 | 19.663146 | 35.000000 | 23.540039 | 124.846109 | -2.919076 | Type_H |
| 159 | 48.109236 | 14.930725 | 35.564683 | 33.178512 | 124.056452 | 7.947905 | Type_H |
| 160 | 74.377678 | 32.053104 | 78.772013 | 42.324573 | 143.560690 | 56.125906 | Type_S |
| 161 | 89.680567 | 32.704435 | 83.130732 | 56.976132 | 129.955476 | 92.027277 | Type_S |
| 162 | 44.529051 | 9.433234 | 52.000000 | 35.095817 | 134.711772 | 29.106575 | Type_S |
| 163 | 77.690577 | 21.380645 | 64.429442 | 56.309932 | 114.818751 | 26.931841 | Type_S |
| 164 | 76.147212 | 21.936186 | 82.961502 | 54.211027 | 123.932010 | 10.431972 | Type_S |
| 165 | 83.933009 | 41.286305 | 62.000000 | 42.646703 | 115.012334 | 26.588100 | Type_S |
| 166 | 78.491730 | 22.181798 | 60.000000 | 56.309932 | 118.530327 | 27.383213 | Type_S |
| 167 | 75.649731 | 19.339799 | 64.148685 | 56.309932 | 95.903629 | 69.551303 | Type_S |
| 168 | 72.076278 | 18.946176 | 51.000000 | 53.130102 | 114.213013 | 1.010041 | Type_S |
| 169 | 58.599529 | -0.261499 | 51.500000 | 58.861028 | 102.042812 | 28.059697 | Type_S |
| 170 | 72.560702 | 17.385191 | 52.000000 | 55.175511 | 119.193724 | 32.108537 | Type_S |
| 171 | 86.900794 | 32.928168 | 47.794347 | 53.972627 | 135.075364 | 101.719092 | Type_S |
| 172 | 84.974132 | 33.021175 | 60.859873 | 51.952957 | 125.659534 | 74.333409 | Type_S |
| 173 | 55.512212 | 20.095157 | 44.000000 | 35.417055 | 122.648753 | 34.552946 | Type_S |
| 174 | 72.222334 | 23.077711 | 91.000000 | 49.144624 | 137.736655 | 56.804093 | Type_S |
| 175 | 70.221452 | 39.822724 | 68.118403 | 30.398728 | 148.525562 | 145.378143 | Type_S |
| 176 | 86.753609 | 36.043016 | 69.221045 | 50.710593 | 139.414504 | 110.860782 | Type_S |
| 177 | 58.782548 | 7.667044 | 53.338941 | 51.115504 | 98.501157 | 51.584125 | Type_S |
| 178 | 67.412538 | 17.442797 | 60.144640 | 49.969741 | 111.123970 | 33.157646 | Type_S |
| 179 | 47.744679 | 12.089351 | 39.000000 | 35.655328 | 117.512004 | 21.682401 | Type_S |
| 180 | 77.106571 | 30.469994 | 69.480628 | 46.636577 | 112.151600 | 70.759083 | Type_S |
| 181 | 74.005541 | 21.122402 | 57.379502 | 52.883139 | 120.205963 | 74.555166 | Type_S |
| 182 | 88.623908 | 29.089453 | 47.564262 | 59.534455 | 121.764780 | 51.805899 | Type_S |
| 183 | 81.104100 | 24.794168 | 77.887020 | 56.309932 | 151.839857 | 65.214616 | Type_S |
| 184 | 76.326002 | 42.396204 | 57.200000 | 33.929797 | 124.267007 | 50.127457 | Type_S |
| 185 | 45.443750 | 9.906072 | 45.000000 | 35.537678 | 163.071041 | 20.315315 | Type_S |
| 186 | 59.785265 | 17.879323 | 59.206461 | 41.905942 | 119.319111 | 22.123869 | Type_S |
| 187 | 44.914149 | 10.218996 | 44.630914 | 34.695154 | 130.075660 | 37.364540 | Type_S |
| 188 | 56.605771 | 16.800200 | 42.000000 | 39.805571 | 127.294522 | 24.018575 | Type_S |
| 189 | 71.186811 | 23.896201 | 43.696665 | 47.290610 | 119.864938 | 27.283985 | Type_S |
| 190 | 81.656032 | 28.748869 | 58.232821 | 52.907163 | 114.769856 | 30.609148 | Type_S |
| 191 | 70.952728 | 20.159931 | 62.859109 | 50.792797 | 116.177932 | 32.522331 | Type_S |
| 192 | 85.352315 | 15.844910 | 71.668660 | 69.507405 | 124.419787 | 76.020603 | Type_S |
| 193 | 58.101935 | 14.837639 | 79.649838 | 43.264295 | 113.587655 | 50.237878 | Type_S |
| 194 | 94.174822 | 15.380770 | 67.705721 | 78.794052 | 114.890113 | 53.255220 | Type_S |
| 195 | 57.522356 | 33.647075 | 50.909858 | 23.875281 | 140.981712 | 148.753711 | Type_S |
| 196 | 96.657315 | 19.461581 | 90.211498 | 77.195734 | 120.673041 | 64.080998 | Type_S |
| 197 | 74.720746 | 19.756942 | 82.735360 | 54.963804 | 109.356594 | 33.306067 | Type_S |
| 198 | 77.655119 | 22.432950 | 93.892779 | 55.222169 | 123.055707 | 61.211187 | Type_S |
| 199 | 58.521623 | 13.922286 | 41.467855 | 44.599337 | 115.514798 | 30.387984 | Type_S |
| 200 | 84.585607 | 30.361685 | 65.479486 | 54.223922 | 108.010219 | 25.118478 | Type_S |
| 201 | 79.938570 | 18.774071 | 63.311835 | 61.164499 | 114.787107 | 38.538741 | Type_S |
| 202 | 70.399308 | 13.469986 | 61.200000 | 56.929322 | 102.337524 | 25.538429 | Type_S |
| 203 | 49.782121 | 6.466805 | 53.000000 | 43.315316 | 110.864783 | 25.335647 | Type_S |
| 204 | 77.409333 | 29.396545 | 63.232302 | 48.012788 | 118.450731 | 93.563737 | Type_S |
| 205 | 65.007964 | 27.602608 | 50.947519 | 37.405357 | 116.581109 | 7.015978 | Type_S |
| 206 | 65.013773 | 9.838262 | 57.735837 | 55.175511 | 94.738525 | 49.696955 | Type_S |
| 207 | 78.425951 | 33.425951 | 76.277439 | 45.000000 | 138.554111 | 77.155172 | Type_S |
| 208 | 63.172987 | 6.330911 | 63.000000 | 56.842076 | 110.644021 | 42.608076 | Type_S |
| 209 | 68.613001 | 15.082235 | 63.014696 | 53.530766 | 123.431174 | 39.497987 | Type_S |
| 210 | 63.900633 | 13.706204 | 62.124334 | 50.194429 | 114.129243 | 41.422828 | Type_S |
| 211 | 84.998956 | 29.610098 | 83.352194 | 55.388858 | 126.912990 | 71.321175 | Type_S |
| 212 | 42.021386 | -6.554948 | 67.900000 | 48.576334 | 111.585782 | 27.338671 | Type_S |
| 213 | 69.756665 | 19.279297 | 48.500000 | 50.477369 | 96.491370 | 51.169640 | Type_S |
| 214 | 80.988074 | 36.843172 | 86.960602 | 44.144903 | 141.088149 | 85.872152 | Type_S |
| 215 | 129.834041 | 8.404475 | 48.384057 | 121.429566 | 107.690466 | 418.543082 | Type_S |
| 216 | 70.484104 | 12.489488 | 62.417142 | 57.994617 | 114.190049 | 56.902448 | Type_S |
| 217 | 86.041280 | 38.750670 | 47.871405 | 47.290610 | 122.092954 | 61.988277 | Type_S |
| 218 | 65.536003 | 24.157487 | 45.775170 | 41.378515 | 136.440302 | 16.378086 | Type_S |
| 219 | 60.753894 | 15.753894 | 43.199158 | 45.000000 | 113.053331 | 31.693548 | Type_S |
| 220 | 54.741775 | 12.095072 | 41.000000 | 42.646703 | 117.643219 | 40.382327 | Type_S |
| 221 | 83.879941 | 23.077427 | 87.141512 | 60.802514 | 124.646072 | 80.555605 | Type_S |
| 222 | 80.074914 | 48.069531 | 52.403439 | 32.005383 | 110.709912 | 67.727316 | Type_S |
| 223 | 65.665347 | 10.540675 | 56.489135 | 55.124672 | 109.162777 | 53.932020 | Type_S |
| 224 | 74.717228 | 14.321679 | 32.500000 | 60.395549 | 107.182218 | 37.017080 | Type_S |
| 225 | 48.060626 | 5.687032 | 57.057161 | 42.373594 | 95.443757 | 32.835877 | Type_S |
| 226 | 70.676898 | 21.704402 | 59.181161 | 48.972496 | 103.008355 | 27.810148 | Type_S |
| 227 | 80.433428 | 16.998479 | 66.536018 | 63.434949 | 116.438981 | 57.781250 | Type_S |
| 228 | 90.513961 | 28.272501 | 69.813942 | 62.241459 | 100.892160 | 58.823648 | Type_S |
| 229 | 77.236898 | 16.737622 | 49.775534 | 60.499275 | 110.690377 | 39.787154 | Type_S |
| 230 | 50.066786 | 9.120340 | 32.168463 | 40.946446 | 99.712453 | 26.766697 | Type_S |
| 231 | 69.781006 | 13.777465 | 58.000000 | 56.003541 | 118.930666 | 17.914560 | Type_S |
| 232 | 69.626283 | 21.122751 | 52.766595 | 48.503532 | 116.803091 | 54.816867 | Type_S |
| 233 | 81.754419 | 20.123466 | 70.560440 | 61.630954 | 119.425086 | 55.506889 | Type_S |
| 234 | 52.204693 | 17.212673 | 78.094969 | 34.992020 | 136.972517 | 54.939134 | Type_S |
| 235 | 77.121344 | 30.349874 | 77.481083 | 46.771470 | 110.611148 | 82.093607 | Type_S |
| 236 | 88.024499 | 39.844669 | 81.774473 | 48.179830 | 116.601538 | 56.766083 | Type_S |
| 237 | 83.396606 | 34.310989 | 78.423293 | 49.085617 | 110.466516 | 49.672096 | Type_S |
| 238 | 72.054034 | 24.700737 | 79.874016 | 47.353297 | 107.172358 | 56.426159 | Type_S |
| 239 | 85.095503 | 21.069897 | 91.734792 | 64.025606 | 109.062312 | 38.032831 | Type_S |
| 240 | 69.563486 | 15.401139 | 74.438497 | 54.162347 | 105.067356 | 29.701211 | Type_S |
| 241 | 89.504947 | 48.903653 | 72.003423 | 40.601295 | 134.634291 | 118.353370 | Type_S |
| 242 | 85.290173 | 18.278890 | 100.744220 | 67.011283 | 110.660700 | 58.884948 | Type_S |
| 243 | 60.626217 | 20.595958 | 64.535262 | 40.030259 | 117.225554 | 104.859247 | Type_S |
| 244 | 60.044177 | 14.309656 | 58.038865 | 45.734521 | 105.131664 | 30.409133 | Type_S |
| 245 | 85.643787 | 42.689195 | 78.750664 | 42.954592 | 105.144076 | 42.887426 | Type_S |
| 246 | 85.581710 | 30.457039 | 78.231379 | 55.124672 | 114.866049 | 68.376122 | Type_S |
| 247 | 55.080766 | -3.759930 | 56.000000 | 58.840695 | 109.915367 | 31.773583 | Type_S |
| 248 | 65.755679 | 9.832874 | 50.822895 | 55.922805 | 104.394959 | 39.307212 | Type_S |
| 249 | 79.249671 | 23.944825 | 40.796698 | 55.304846 | 98.622512 | 36.706395 | Type_S |
| 250 | 81.112605 | 20.690444 | 60.687006 | 60.422161 | 94.018783 | 40.510982 | Type_S |
| 251 | 48.030624 | 3.969815 | 58.344519 | 44.060809 | 125.350962 | 35.000078 | Type_S |
| 252 | 63.404481 | 14.115327 | 48.136806 | 49.289153 | 111.916007 | 31.784495 | Type_S |
| 253 | 57.286945 | 15.149350 | 64.000000 | 42.137595 | 116.735387 | 30.341203 | Type_S |
| 254 | 41.187770 | 5.792974 | 42.867392 | 35.394796 | 103.348880 | 27.660277 | Type_S |
| 255 | 66.804796 | 14.551602 | 72.084912 | 52.253195 | 82.456038 | 41.685474 | Type_S |
| 256 | 79.476978 | 26.732268 | 70.650982 | 52.744711 | 118.588669 | 61.700598 | Type_S |
| 257 | 44.216464 | 1.507075 | 46.110339 | 42.709390 | 108.629567 | 42.810481 | Type_S |
| 258 | 57.035097 | 0.345728 | 49.198003 | 56.689369 | 103.048698 | 52.165145 | Type_S |
| 259 | 64.274818 | 12.508643 | 68.702377 | 51.766175 | 95.252454 | 39.409826 | Type_S |
| 260 | 92.026308 | 35.392674 | 77.416963 | 56.633634 | 115.723530 | 58.057542 | Type_S |
| 261 | 67.263149 | 7.194661 | 51.696887 | 60.068488 | 97.801085 | 42.136943 | Type_S |
| 262 | 118.144655 | 38.449501 | 50.838520 | 79.695154 | 81.024541 | 74.043767 | Type_S |
| 263 | 115.923261 | 37.515436 | 76.800000 | 78.407825 | 104.698603 | 81.198927 | Type_S |
| 264 | 53.941658 | 9.306594 | 43.100498 | 44.635064 | 124.397821 | 25.082127 | Type_S |
| 265 | 83.703177 | 20.268229 | 77.110598 | 63.434949 | 125.480174 | 69.279571 | Type_S |
| 266 | 56.991404 | 6.874089 | 57.009005 | 50.117315 | 109.978045 | 36.810111 | Type_S |
| 267 | 72.343594 | 16.420790 | 59.869012 | 55.922805 | 70.082575 | 12.072644 | Type_S |
| 268 | 95.382596 | 24.822631 | 95.157633 | 70.559965 | 89.307547 | 57.660841 | Type_S |
| 269 | 44.253476 | 1.101087 | 38.000000 | 43.152390 | 98.274107 | 23.910635 | Type_S |
| 270 | 64.809541 | 15.174078 | 58.839994 | 49.635463 | 111.679961 | 21.407198 | Type_S |
| 271 | 78.401254 | 14.042260 | 79.694263 | 64.358994 | 104.731234 | 12.392853 | Type_S |
| 272 | 56.668293 | 13.458203 | 43.769710 | 43.210089 | 93.692209 | 21.108121 | Type_S |
| 273 | 50.825029 | 9.064729 | 56.300000 | 41.760300 | 78.999454 | 23.041524 | Type_S |
| 274 | 61.411737 | 25.384364 | 39.096869 | 36.027373 | 103.404597 | 21.843407 | Type_S |
| 275 | 56.563824 | 8.961262 | 52.577846 | 47.602562 | 98.777115 | 50.701873 | Type_S |
| 276 | 67.027664 | 13.281502 | 66.150403 | 53.746162 | 100.715413 | 33.989136 | Type_S |
| 277 | 80.817771 | 19.238981 | 61.642451 | 61.578791 | 89.471834 | 44.167602 | Type_S |
| 278 | 80.654320 | 26.344379 | 60.898118 | 54.309940 | 120.103493 | 52.467552 | Type_S |
| 279 | 68.721910 | 49.431864 | 68.056012 | 19.290046 | 125.018517 | 54.691289 | Type_S |
| 280 | 37.903910 | 4.479099 | 24.710274 | 33.424811 | 157.848799 | 33.607027 | Type_S |
| 281 | 64.624008 | 15.225303 | 67.632167 | 49.398705 | 90.298468 | 31.326411 | Type_S |
| 282 | 75.437748 | 31.539454 | 89.600000 | 43.898294 | 106.829590 | 54.965789 | Type_S |
| 283 | 71.001941 | 37.515772 | 84.537093 | 33.486169 | 125.164232 | 67.771190 | Type_S |
| 284 | 81.056611 | 20.801492 | 91.784495 | 60.255119 | 125.430176 | 38.181782 | Type_S |
| 285 | 91.468741 | 24.508177 | 84.620272 | 66.960564 | 117.307897 | 52.623047 | Type_S |
| 286 | 81.082320 | 21.255840 | 78.766756 | 59.826480 | 90.071880 | 49.159426 | Type_S |
| 287 | 60.419932 | 5.265665 | 59.814236 | 55.154267 | 109.033075 | 30.265785 | Type_S |
| 288 | 85.680950 | 38.650035 | 82.680977 | 47.030914 | 120.840707 | 61.959034 | Type_S |
| 289 | 82.406524 | 29.276422 | 77.054565 | 53.130102 | 117.042244 | 62.765348 | Type_S |
| 290 | 43.718262 | 9.811985 | 52.000000 | 33.906277 | 88.434242 | 40.880923 | Type_S |
| 291 | 86.472905 | 40.303766 | 61.141012 | 46.169139 | 97.404189 | 55.752221 | Type_S |
| 292 | 74.469082 | 33.283157 | 66.942101 | 41.185925 | 146.466001 | 124.984406 | Type_S |
| 293 | 70.250436 | 10.340123 | 76.370070 | 59.910314 | 119.237007 | 32.666502 | Type_S |
| 294 | 72.643850 | 18.929117 | 68.000000 | 53.714733 | 116.963416 | 25.384247 | Type_S |
| 295 | 71.241764 | 5.268270 | 85.999584 | 65.973493 | 110.703107 | 38.259864 | Type_S |
| 296 | 63.772391 | 12.763385 | 65.360524 | 51.009006 | 89.822741 | 55.995454 | Type_S |
| 297 | 58.828379 | 37.577873 | 125.742385 | 21.250506 | 135.629418 | 117.314683 | Type_S |
| 298 | 74.854480 | 13.909084 | 62.693259 | 60.945396 | 115.208701 | 33.172255 | Type_S |
| 299 | 75.298478 | 16.671484 | 61.296204 | 58.626995 | 118.883388 | 31.575823 | Type_S |
| 300 | 63.364339 | 20.024621 | 67.498705 | 43.339718 | 130.999258 | 37.556706 | Type_S |
| 301 | 67.513053 | 33.275590 | 96.283062 | 34.237463 | 145.601033 | 88.301486 | Type_S |
| 302 | 76.314028 | 41.933683 | 93.284863 | 34.380345 | 132.267285 | 101.218783 | Type_S |
| 303 | 73.635962 | 9.711318 | 63.000000 | 63.924644 | 98.727930 | 26.975787 | Type_S |
| 304 | 56.535051 | 14.377189 | 44.991547 | 42.157862 | 101.723334 | 25.773174 | Type_S |
| 305 | 80.111572 | 33.942432 | 85.101608 | 46.169139 | 125.593624 | 100.292107 | Type_S |
| 306 | 95.480229 | 46.550053 | 59.000000 | 48.930176 | 96.683903 | 77.283072 | Type_S |
| 307 | 74.094731 | 18.823727 | 76.032156 | 55.271004 | 128.405731 | 73.388216 | Type_S |
| 308 | 87.679087 | 20.365613 | 93.822416 | 67.313473 | 120.944829 | 76.730629 | Type_S |
| 309 | 48.259920 | 16.417462 | 36.329137 | 31.842457 | 94.882336 | 28.343799 | Type_S |
# checking the data in random order.
final_set.sample(10)
| P_incidence | P_tilt | L_angle | S_slope | P_radius | S_Degree | Class | |
|---|---|---|---|---|---|---|---|
| 244 | 60.044177 | 14.309656 | 58.038865 | 45.734521 | 105.131664 | 30.409133 | Type_S |
| 194 | 94.174822 | 15.380770 | 67.705721 | 78.794052 | 114.890113 | 53.255220 | Type_S |
| 268 | 95.382596 | 24.822631 | 95.157633 | 70.559965 | 89.307547 | 57.660841 | Type_S |
| 152 | 50.209670 | 29.760122 | 36.104007 | 20.449548 | 128.292515 | 5.740614 | Type_H |
| 29 | 56.447026 | 19.444499 | 43.577846 | 37.002527 | 139.189690 | -1.859689 | Normal |
| 48 | 40.349296 | 10.194748 | 37.967747 | 30.154548 | 128.009927 | 0.458901 | Normal |
| 306 | 95.480229 | 46.550053 | 59.000000 | 48.930176 | 96.683903 | 77.283072 | Type_S |
| 137 | 35.703458 | 19.443253 | 20.700000 | 16.260205 | 137.540613 | -0.263490 | Type_H |
| 262 | 118.144655 | 38.449501 | 50.838520 | 79.695154 | 81.024541 | 74.043767 | Type_S |
| 169 | 58.599529 | -0.261499 | 51.500000 | 58.861028 | 102.042812 | 28.059697 | Type_S |
final_set.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| P_incidence | 310.0 | 60.496653 | 17.236520 | 26.147921 | 46.430294 | 58.691038 | 72.877696 | 129.834041 |
| P_tilt | 310.0 | 17.542822 | 10.008330 | -6.554948 | 10.667069 | 16.357689 | 22.120395 | 49.431864 |
| L_angle | 310.0 | 51.930930 | 18.554064 | 14.000000 | 37.000000 | 49.562398 | 63.000000 | 125.742385 |
| S_slope | 310.0 | 42.953831 | 13.423102 | 13.366931 | 33.347122 | 42.404912 | 52.695888 | 121.429566 |
| P_radius | 310.0 | 117.920655 | 13.317377 | 70.082575 | 110.709196 | 118.268178 | 125.467674 | 163.071041 |
| S_Degree | 310.0 | 26.296694 | 37.559027 | -11.058179 | 1.603727 | 11.767934 | 41.287352 | 418.543082 |
# 1. We'll draw the pairplot to check and compare each variable and try and find relations among them.
sns.pairplot(final_set)
<seaborn.axisgrid.PairGrid at 0x7faaedb8cf40>
# analysing categorical variable.
import plotly.express as px
fig = px.histogram(final_set.Class)
fig.show()
# analysing each variable one by one:
P_incidence_mean = final_set.P_incidence.mean()
fig, ax_hist = plt.subplots(figsize = (12.8, 6))
sns.distplot(final_set.P_incidence)
ax_hist.axvline(P_incidence_mean, color = 'r', linestyle = '--', label = 'Mean')
<matplotlib.lines.Line2D at 0x7faaef7f0370>
P_radious_mean = final_set.P_radius.mean()
fig, ax_hist = plt.subplots(figsize = (10, 5))
sns.distplot(final_set.P_radius)
ax_hist.axvline(P_radious_mean, color = 'r', linestyle = '--', label = 'Mean')
<matplotlib.lines.Line2D at 0x7fab02e5ec10>
P_tilt_mean = final_set.P_tilt.mean()
fig, ax_hist = plt.subplots(figsize = (10,5))
sns.distplot(final_set.P_tilt)
ax_hist.axvline(P_tilt_mean, color = 'r', linestyle = '--', label = 'Mean')
<matplotlib.lines.Line2D at 0x7fab02ef4d30>
S_slope_mean = final_set.S_slope.mean()
fig, ax_hist = plt.subplots(figsize = (10,5))
sns.distplot(final_set.S_slope)
ax_hist.axvline(S_slope_mean, color = 'r', linestyle = '--', label = 'Mean')
<matplotlib.lines.Line2D at 0x7fab03a9e190>
L_angle_mean = final_set.L_angle.mean()
fig, ax_hist = plt.subplots(figsize = (10,5))
sns.distplot(final_set.L_angle)
ax_hist.axvline(L_angle_mean, color = 'r', linestyle = '--', label = 'Mean')
<matplotlib.lines.Line2D at 0x7faae6d19ac0>
S_Degree_mean = final_set.S_Degree.mean()
fig, ax_hist = plt.subplots(figsize = (10,5))
sns.distplot(final_set.S_Degree)
ax_hist.axvline(S_Degree_mean, color = 'r', linestyle = '--', label = 'Mean')
<matplotlib.lines.Line2D at 0x7fab02ebbd90>
fig = px.scatter(x = final_set.P_incidence, y = final_set.S_Degree, color=final_set.Class)
fig.show()
Making checks for Outliers that might hamper our model and remove them.
s = sns.boxplot(x=final_set['P_incidence'],y=final_set['Class'])
final_set[final_set['P_incidence'] > 100]
| P_incidence | P_tilt | L_angle | S_slope | P_radius | S_Degree | Class | |
|---|---|---|---|---|---|---|---|
| 215 | 129.834041 | 8.404475 | 48.384057 | 121.429566 | 107.690466 | 418.543082 | Type_S |
| 262 | 118.144655 | 38.449501 | 50.838520 | 79.695154 | 81.024541 | 74.043767 | Type_S |
| 263 | 115.923261 | 37.515436 | 76.800000 | 78.407825 | 104.698603 | 81.198927 | Type_S |
s = sns.boxplot(x=final_set['S_Degree'],y=final_set['Class'])
final_set[final_set['S_Degree'] > 300]
| P_incidence | P_tilt | L_angle | S_slope | P_radius | S_Degree | Class | |
|---|---|---|---|---|---|---|---|
| 215 | 129.834041 | 8.404475 | 48.384057 | 121.429566 | 107.690466 | 418.543082 | Type_S |
s = sns.boxplot(x=final_set['S_slope'],y=final_set['Class'])
final_set[final_set['S_slope'] > 120]
| P_incidence | P_tilt | L_angle | S_slope | P_radius | S_Degree | Class | |
|---|---|---|---|---|---|---|---|
| 215 | 129.834041 | 8.404475 | 48.384057 | 121.429566 | 107.690466 | 418.543082 | Type_S |
s = sns.boxplot(x=final_set['P_tilt'],y=final_set['Class'])
Observation point: P-tilt and S-degree are the only ones with -ive data.
s = sns.boxplot(x=final_set['P_radius'],y=final_set['Class'])
final_set[final_set['P_radius'] > 150]
| P_incidence | P_tilt | L_angle | S_slope | P_radius | S_Degree | Class | |
|---|---|---|---|---|---|---|---|
| 183 | 81.10410 | 24.794168 | 77.887020 | 56.309932 | 151.839857 | 65.214616 | Type_S |
| 185 | 45.44375 | 9.906072 | 45.000000 | 35.537678 | 163.071041 | 20.315315 | Type_S |
| 280 | 37.90391 | 4.479099 | 24.710274 | 33.424811 | 157.848799 | 33.607027 | Type_S |
s = sns.boxplot(x=final_set['L_angle'],y=final_set['Class'])
final_set[final_set['L_angle'] > 100]
| P_incidence | P_tilt | L_angle | S_slope | P_radius | S_Degree | Class | |
|---|---|---|---|---|---|---|---|
| 242 | 85.290173 | 18.278890 | 100.744220 | 67.011283 | 110.660700 | 58.884948 | Type_S |
| 297 | 58.828379 | 37.577873 | 125.742385 | 21.250506 | 135.629418 | 117.314683 | Type_S |
Data present in row 215 has produced many outliers in fields P_incidene, S_Slope, S_Degree. Hence we delete this record.
final_set.drop(index=215, inplace=True)
final_set.count()
P_incidence 309 P_tilt 309 L_angle 309 S_slope 309 P_radius 309 S_Degree 309 Class 309 dtype: int64
correlation = final_set.corr()
plt.figure(figsize = (15, 10))
plt.xticks(fontsize = 15, rotation = 45)
plt.yticks(fontsize = 15)
plt.title('Correlation HeatMap', fontsize = 15)
sns.heatmap(correlation, annot = True, cmap = 'YlGnBu')
<AxesSubplot:title={'center':'Correlation HeatMap'}>
Observations:
Change variable type from Object to categorical.
From corelation table,
# dropping P-Radious ---> done after regular modelling below
First, we'll converting the Object variable to the Categorical Variable.
final_set.dtypes
P_incidence float64 P_tilt float64 L_angle float64 S_slope float64 P_radius float64 S_Degree float64 Class object dtype: object
final_set['Class'] = final_set['Class'].astype('category')
final_set.dtypes
P_incidence float64 P_tilt float64 L_angle float64 S_slope float64 P_radius float64 S_Degree float64 Class category dtype: object
#Copying the dataset for encoding.
final_set_encoded = final_set.copy(deep = True)
final_set_encoded.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 309 entries, 0 to 309 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 P_incidence 309 non-null float64 1 P_tilt 309 non-null float64 2 L_angle 309 non-null float64 3 S_slope 309 non-null float64 4 P_radius 309 non-null float64 5 S_Degree 309 non-null float64 6 Class 309 non-null category dtypes: category(1), float64(6) memory usage: 27.3 KB
enc = LabelEncoder()
final_set_encoded['Class'] = enc.fit_transform(final_set_encoded['Class']).astype(int)
final_set_encoded.dtypes
P_incidence float64 P_tilt float64 L_angle float64 S_slope float64 P_radius float64 S_Degree float64 Class int64 dtype: object
# dividing data into Predicator(X) vs Target(y)-
X = final_set_encoded.drop('Class',axis = 1)
print('Sample Predicator data:\n',X.head(5))
y = final_set_encoded['Class']
print('\nSample Traget data:\n',y.head(5))
Sample Predicator data:
P_incidence P_tilt L_angle S_slope P_radius S_Degree
0 38.505273 16.964297 35.112814 21.540976 127.632875 7.986683
1 54.920858 18.968430 51.601455 35.952428 125.846646 2.001642
2 44.362490 8.945435 46.902096 35.417055 129.220682 4.994195
3 48.318931 17.452121 48.000000 30.866809 128.980308 -0.910941
4 45.701789 10.659859 42.577846 35.041929 130.178314 -3.388910
Sample Traget data:
0 0
1 0
2 0
3 0
4 0
Name: Class, dtype: int64
X.corrwith(y).plot.bar(grid = True)
plt.title('Correlation with Class', fontsize = 20)
Text(0.5, 1.0, 'Correlation with Class')
Using Standard Scaler to standardize the values of each column. This is required in order to bring the input variables on same scale, which might be on different scales in the raw form
X_std = pd.DataFrame(StandardScaler().fit_transform(X))
X_std.columns = X.columns #the column names are passed to the standardized dataframe
X_std.head()
| P_incidence | P_tilt | L_angle | S_slope | P_radius | S_Degree | |
|---|---|---|---|---|---|---|
| 0 | -1.297382 | -0.060842 | -0.907111 | -1.671791 | 0.727500 | -0.564534 |
| 1 | -0.318961 | 0.139676 | -0.018377 | -0.533124 | 0.593244 | -0.762811 |
| 2 | -0.948273 | -0.863147 | -0.271672 | -0.575424 | 0.846843 | -0.663672 |
| 3 | -0.712457 | -0.012034 | -0.212495 | -0.934945 | 0.828776 | -0.859301 |
| 4 | -0.868447 | -0.691615 | -0.504747 | -0.605063 | 0.918821 | -0.941394 |
X_std.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| P_incidence | 309.0 | 7.114050e-17 | 1.001622 | -2.033920 | -0.825260 | -0.099700 | 0.737387 | 3.449381 |
| P_tilt | 309.0 | 9.651575e-17 | 1.001622 | -2.413996 | -0.688730 | -0.115554 | 0.461181 | 3.187612 |
| L_angle | 309.0 | 2.705495e-16 | 1.001622 | -2.045086 | -0.805392 | -0.116794 | 0.596001 | 3.977800 |
| S_slope | 309.0 | -3.848055e-16 | 1.001622 | -2.317633 | -0.739479 | -0.025779 | 0.778226 | 2.923046 |
| P_radius | 309.0 | 6.445761e-16 | 1.001622 | -3.598089 | -0.544462 | 0.029280 | 0.565699 | 3.391100 |
| S_Degree | 309.0 | 3.245336e-16 | 1.001622 | -1.195467 | -0.776291 | -0.449361 | 0.525211 | 4.098903 |
# Splitting data in train and test
X_train, X_test, y_train, y_test = train_test_split(X_std, y, test_size = 0.3, random_state = 10)
X_train.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 216 entries, 210 to 265 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 P_incidence 216 non-null float64 1 P_tilt 216 non-null float64 2 L_angle 216 non-null float64 3 S_slope 216 non-null float64 4 P_radius 216 non-null float64 5 S_Degree 216 non-null float64 dtypes: float64(6) memory usage: 11.8 KB
y_train.value_counts()
2 100 0 71 1 45 Name: Class, dtype: int64
y_test.value_counts()
2 49 0 29 1 15 Name: Class, dtype: int64
# check for the Target balancing
print('\nTarget balancing between train and test data: \n')
print('Ratio of Class = 0: ', (y_test == 0).count() / (y_train == 0).count())
print('Ratio of Class = 1: ', (y_test == 1).count() / (y_train == 1).count())
print('Ratio of Class = 2: ', (y_test == 2).count() / (y_train == 2).count())
Target balancing between train and test data: Ratio of Class = 0: 0.4305555555555556 Ratio of Class = 1: 0.4305555555555556 Ratio of Class = 2: 0.4305555555555556
Observation: For target balancing
# checking shape of train and test data:
a, b = X_train.shape
print('The training set comprises of', a, 'rows and', b, 'columns.')
c, d = X_test.shape
print('The testing set comprises of', c, 'rows and', d, 'columns.')
The training set comprises of 216 rows and 6 columns. The testing set comprises of 93 rows and 6 columns.
# We'll start with basic KNN model with n=3 and then later, we'll find the best neighbour and model accuracies.
NHH1 = KNeighborsClassifier(n_neighbors=3, weights='distance')
NHH1.fit(X_train,y_train)
KNeighborsClassifier(n_neighbors=3, weights='distance')
# checking predictions
predicted_lab = NHH1.predict(X_test)
NHH1.score(X_test,y_test)
0.7634408602150538
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
y_pred = NHH1.predict(X_test)
confusion_matrix(y_test, y_pred)
array([[17, 11, 1],
[ 4, 11, 0],
[ 4, 2, 43]])
Observation:
# Now, we'll be automating the task to find the best fit Model and neighbour for the KNN model.
# odd list for Neighbour range:
myList = list(range(1, 30))
# selecting only odd values to pass:
neighbors = list(filter(lambda x: x % 2 != 0, myList))
accuracy_scores = [] # empty list to hold thre result.
for k in neighbors:
knn = KNeighborsClassifier(n_neighbors = k)
knn.fit(X_train, y_train)
# predict the response
y_predictKNN = knn.predict(X_test)
# evaluate accuracy
scores = accuracy_score(y_test, y_predictKNN)
accuracy_scores.append(scores)
# changing to misclassification error
MSE = [1 - x for x in accuracy_scores]
# Best k value: we found the best value of 'k' with the help of Misclassification error.
optimal_k = neighbors[MSE.index(min(MSE))]
print('The best value of neighbors is: ', optimal_k)
The best value of neighbors is: 5
#Using optimal_k from above as the final model for prediction
knn = KNeighborsClassifier(n_neighbors = optimal_k)
#Fit on train data
knn.fit(X_train, y_train)
#fit on test data
y_predictKNN = knn.predict(X_test)
# evaluate accuracy
print('\nAccuracy Score of Training Data: ', knn.score(X_train, y_train))
modelKNN_score = accuracy_score(y_test,y_predictKNN)
cfm_KNN = confusion_matrix(y_test, y_predictKNN)
print('Accuracy Score of Test Data: ', modelKNN_score)
print('\nClassification Report of KNN Model:\n ', classification_report(y_test, y_predictKNN))
print('\nConfusion Matrix for our Model:\n', cfm_KNN)
#visualization of confusion matrix in the form of a heatmap
plt.figure(figsize = (12, 8))
sns.heatmap(cfm_KNN, annot = True, cmap = 'YlGnBu', fmt = 'd')
plt.xlabel('Actual Classes', fontsize = 15)
plt.ylabel('Predicted Classes', fontsize = 15)
plt.title('Confusion Matrix HeatMap', fontsize = 15)
Accuracy Score of Training Data: 0.8796296296296297
Accuracy Score of Test Data: 0.8064516129032258
Classification Report of KNN Model:
precision recall f1-score support
0 0.72 0.72 0.72 29
1 0.52 0.73 0.61 15
2 1.00 0.88 0.93 49
accuracy 0.81 93
macro avg 0.75 0.78 0.76 93
weighted avg 0.84 0.81 0.82 93
Confusion Matrix for our Model:
[[21 8 0]
[ 4 11 0]
[ 4 2 43]]
Text(0.5, 1.0, 'Confusion Matrix HeatMap')
from sklearn.metrics import precision_recall_fscore_support, mean_absolute_error
precision_KNN, recall_KNN, f1_score_KNN, support = precision_recall_fscore_support(y_test, y_predictKNN, average = 'macro')
print('Precision Score :', '%0.2f' % precision_KNN)
print('Recall Score :', '%0.2f' % recall_KNN)
print('F1-Score :', '%0.2f' % f1_score_KNN)
print('Accuracy Score :','%0.2f' % modelKNN_score)
Precision Score : 0.75 Recall Score : 0.78 F1-Score : 0.76 Accuracy Score : 0.81
Observations:
One more observation that we can make here
f1 = final_set_encoded.copy(deep=True)
f1.drop(columns='P_radius',inplace=True)
f1.head(5)
| P_incidence | P_tilt | L_angle | S_slope | S_Degree | Class | |
|---|---|---|---|---|---|---|
| 0 | 38.505273 | 16.964297 | 35.112814 | 21.540976 | 7.986683 | 0 |
| 1 | 54.920858 | 18.968430 | 51.601455 | 35.952428 | 2.001642 | 0 |
| 2 | 44.362490 | 8.945435 | 46.902096 | 35.417055 | 4.994195 | 0 |
| 3 | 48.318931 | 17.452121 | 48.000000 | 30.866809 | -0.910941 | 0 |
| 4 | 45.701789 | 10.659859 | 42.577846 | 35.041929 | -3.388910 | 0 |
# splitting the data as A = X and B = y
A = f1.drop('Class',axis = 1)
B = f1['Class']
# normalizing the data with z-score
f1 = f1.apply(zscore)
f1.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| P_incidence | 309.0 | 7.114050e-17 | 1.001622 | -2.033920 | -0.825260 | -0.099700 | 0.737387 | 3.449381 |
| P_tilt | 309.0 | 9.651575e-17 | 1.001622 | -2.413996 | -0.688730 | -0.115554 | 0.461181 | 3.187612 |
| L_angle | 309.0 | 2.705495e-16 | 1.001622 | -2.045086 | -0.805392 | -0.116794 | 0.596001 | 3.977800 |
| S_slope | 309.0 | -3.848055e-16 | 1.001622 | -2.317633 | -0.739479 | -0.025779 | 0.778226 | 2.923046 |
| S_Degree | 309.0 | 3.245336e-16 | 1.001622 | -1.195467 | -0.776291 | -0.449361 | 0.525211 | 4.098903 |
| Class | 309.0 | 1.564373e-15 | 1.001622 | -1.311259 | -1.311259 | -0.179474 | 0.952311 | 0.952311 |
# splitting data for train and test
X_train, X_test, y_train, y_test = train_test_split(A, B, test_size = 0.3, random_state = 10)
# fitting the KNN model for k=5 (proven best fit above)
NHH1 = KNeighborsClassifier(n_neighbors=5, weights='distance')
NHH1.fit(X_train,y_train)
KNeighborsClassifier(weights='distance')
predicted_lab = NHH1.predict(X_test)
NHH1.score(X_test,y_test)
0.8279569892473119
precision_KNN, recall_KNN, f1_score_KNN, support = precision_recall_fscore_support(y_test, predicted_lab, average = 'macro')
print('Precision Score :', '%0.2f' % precision_KNN)
print('Recall Score :', '%0.2f' % recall_KNN)
print('F1-Score :', '%0.2f' % f1_score_KNN)
print('Accuracy Score :','%0.2f' % modelKNN_score)
Precision Score : 0.78 Recall Score : 0.81 F1-Score : 0.78 Accuracy Score : 0.81
Observation:
For model with only 5 variables, the Score comes to be 82.79%.
Also, there's improvement in Precision from 0.75 to 0.78.
Observations:
Suggestion:
• CONTEXT: A bank X is on a massive digital transformation for all its departments. Bank has a growing customer base whee majority of them are liability customers (depositors) vs borrowers (asset customers). The bank is interested in expanding the borrowers base rapidly to bring in more business via loan interests. A campaign that the bank ran in last quarter showed an average single digit conversion rate. Digital transformation being the core strength of the business strategy, marketing department wants to devise effective campaigns with better target marketing to increase the conversion ratio to double digit with same budget as per last campaign.
# We have already imported all the necessary libraries above, we'll import more when needed
# importing data from file 1
Part2_1 = pd.read_csv('Part2 - Data1.csv')
Part2_1.head(5)
| ID | Age | CustomerSince | HighestSpend | ZipCode | HiddenScore | MonthlyAverageSpend | Level | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 25 | 1 | 49 | 91107 | 4 | 1.6 | 1 |
| 1 | 2 | 45 | 19 | 34 | 90089 | 3 | 1.5 | 1 |
| 2 | 3 | 39 | 15 | 11 | 94720 | 1 | 1.0 | 1 |
| 3 | 4 | 35 | 9 | 100 | 94112 | 1 | 2.7 | 2 |
| 4 | 5 | 35 | 8 | 45 | 91330 | 4 | 1.0 | 2 |
# importing data from file 2
Part2_2 = pd.read_csv('Part2 -Data2.csv')
Part2_2.head(5)
| ID | Mortgage | Security | FixedDepositAccount | InternetBanking | CreditCard | LoanOnCard | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 1 | 0 | 0 | 0 | NaN |
| 1 | 2 | 0 | 1 | 0 | 0 | 0 | NaN |
| 2 | 3 | 0 | 0 | 0 | 0 | 0 | NaN |
| 3 | 4 | 0 | 0 | 0 | 0 | 0 | NaN |
| 4 | 5 | 0 | 0 | 0 | 0 | 1 | NaN |
Part2_1.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5000 entries, 0 to 4999 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ID 5000 non-null int64 1 Age 5000 non-null int64 2 CustomerSince 5000 non-null int64 3 HighestSpend 5000 non-null int64 4 ZipCode 5000 non-null int64 5 HiddenScore 5000 non-null int64 6 MonthlyAverageSpend 5000 non-null float64 7 Level 5000 non-null int64 dtypes: float64(1), int64(7) memory usage: 312.6 KB
Part2_2.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5000 entries, 0 to 4999 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ID 5000 non-null int64 1 Mortgage 5000 non-null int64 2 Security 5000 non-null int64 3 FixedDepositAccount 5000 non-null int64 4 InternetBanking 5000 non-null int64 5 CreditCard 5000 non-null int64 6 LoanOnCard 4980 non-null float64 dtypes: float64(1), int64(6) memory usage: 273.6 KB
print('Shape of dataset 1 :',Part2_1.shape)
print('Shape of dataset 2 :',Part2_2.shape)
Shape of dataset 1 : (5000, 8) Shape of dataset 2 : (5000, 7)
Observations:
# mergign of the dataset.
Data = pd.merge(Part2_1,Part2_2,on='ID')
Data.head(10)
| ID | Age | CustomerSince | HighestSpend | ZipCode | HiddenScore | MonthlyAverageSpend | Level | Mortgage | Security | FixedDepositAccount | InternetBanking | CreditCard | LoanOnCard | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 25 | 1 | 49 | 91107 | 4 | 1.6 | 1 | 0 | 1 | 0 | 0 | 0 | NaN |
| 1 | 2 | 45 | 19 | 34 | 90089 | 3 | 1.5 | 1 | 0 | 1 | 0 | 0 | 0 | NaN |
| 2 | 3 | 39 | 15 | 11 | 94720 | 1 | 1.0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN |
| 3 | 4 | 35 | 9 | 100 | 94112 | 1 | 2.7 | 2 | 0 | 0 | 0 | 0 | 0 | NaN |
| 4 | 5 | 35 | 8 | 45 | 91330 | 4 | 1.0 | 2 | 0 | 0 | 0 | 0 | 1 | NaN |
| 5 | 6 | 37 | 13 | 29 | 92121 | 4 | 0.4 | 2 | 155 | 0 | 0 | 1 | 0 | NaN |
| 6 | 7 | 53 | 27 | 72 | 91711 | 2 | 1.5 | 2 | 0 | 0 | 0 | 1 | 0 | NaN |
| 7 | 8 | 50 | 24 | 22 | 93943 | 1 | 0.3 | 3 | 0 | 0 | 0 | 0 | 1 | NaN |
| 8 | 9 | 35 | 10 | 81 | 90089 | 3 | 0.6 | 2 | 104 | 0 | 0 | 1 | 0 | NaN |
| 9 | 10 | 34 | 9 | 180 | 93023 | 1 | 8.9 | 3 | 0 | 0 | 0 | 0 | 0 | 1.0 |
Data.shape
(5000, 14)
Data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 5000 entries, 0 to 4999 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ID 5000 non-null int64 1 Age 5000 non-null int64 2 CustomerSince 5000 non-null int64 3 HighestSpend 5000 non-null int64 4 ZipCode 5000 non-null int64 5 HiddenScore 5000 non-null int64 6 MonthlyAverageSpend 5000 non-null float64 7 Level 5000 non-null int64 8 Mortgage 5000 non-null int64 9 Security 5000 non-null int64 10 FixedDepositAccount 5000 non-null int64 11 InternetBanking 5000 non-null int64 12 CreditCard 5000 non-null int64 13 LoanOnCard 4980 non-null float64 dtypes: float64(2), int64(12) memory usage: 585.9 KB
Data.isnull().value_counts().to_frame().T
| ID | False | |
|---|---|---|
| Age | False | |
| CustomerSince | False | |
| HighestSpend | False | |
| ZipCode | False | |
| HiddenScore | False | |
| MonthlyAverageSpend | False | |
| Level | False | |
| Mortgage | False | |
| Security | False | |
| FixedDepositAccount | False | |
| InternetBanking | False | |
| CreditCard | False | |
| LoanOnCard | False | True |
| 0 | 4980 | 20 |
Here, we can observe that field 'LoanOnCard' has 20 null values. We'll find which those records are in below exploratoin.
Data.describe(include='all').T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| ID | 5000.0 | 2500.500000 | 1443.520003 | 1.0 | 1250.75 | 2500.5 | 3750.25 | 5000.0 |
| Age | 5000.0 | 45.338400 | 11.463166 | 23.0 | 35.00 | 45.0 | 55.00 | 67.0 |
| CustomerSince | 5000.0 | 20.104600 | 11.467954 | -3.0 | 10.00 | 20.0 | 30.00 | 43.0 |
| HighestSpend | 5000.0 | 73.774200 | 46.033729 | 8.0 | 39.00 | 64.0 | 98.00 | 224.0 |
| ZipCode | 5000.0 | 93152.503000 | 2121.852197 | 9307.0 | 91911.00 | 93437.0 | 94608.00 | 96651.0 |
| HiddenScore | 5000.0 | 2.396400 | 1.147663 | 1.0 | 1.00 | 2.0 | 3.00 | 4.0 |
| MonthlyAverageSpend | 5000.0 | 1.937938 | 1.747659 | 0.0 | 0.70 | 1.5 | 2.50 | 10.0 |
| Level | 5000.0 | 1.881000 | 0.839869 | 1.0 | 1.00 | 2.0 | 3.00 | 3.0 |
| Mortgage | 5000.0 | 56.498800 | 101.713802 | 0.0 | 0.00 | 0.0 | 101.00 | 635.0 |
| Security | 5000.0 | 0.104400 | 0.305809 | 0.0 | 0.00 | 0.0 | 0.00 | 1.0 |
| FixedDepositAccount | 5000.0 | 0.060400 | 0.238250 | 0.0 | 0.00 | 0.0 | 0.00 | 1.0 |
| InternetBanking | 5000.0 | 0.596800 | 0.490589 | 0.0 | 0.00 | 1.0 | 1.00 | 1.0 |
| CreditCard | 5000.0 | 0.294000 | 0.455637 | 0.0 | 0.00 | 0.0 | 1.00 | 1.0 |
| LoanOnCard | 4980.0 | 0.096386 | 0.295149 | 0.0 | 0.00 | 0.0 | 0.00 | 1.0 |
Data.HiddenScore.value_counts().to_frame()
| HiddenScore | |
|---|---|
| 1 | 1472 |
| 2 | 1296 |
| 4 | 1222 |
| 3 | 1010 |
Data.Level.value_counts().to_frame()
| Level | |
|---|---|
| 1 | 2096 |
| 3 | 1501 |
| 2 | 1403 |
Data.MonthlyAverageSpend.unique().sum()
463.07
Data.Security.value_counts().to_frame()
| Security | |
|---|---|
| 0 | 4478 |
| 1 | 522 |
Data.FixedDepositAccount.value_counts().to_frame()
| FixedDepositAccount | |
|---|---|
| 0 | 4698 |
| 1 | 302 |
Data.InternetBanking.value_counts().to_frame()
| InternetBanking | |
|---|---|
| 1 | 2984 |
| 0 | 2016 |
Data.CreditCard.value_counts().to_frame()
| CreditCard | |
|---|---|
| 0 | 3530 |
| 1 | 1470 |
Data.LoanOnCard.value_counts().to_frame()
| LoanOnCard | |
|---|---|
| 0.0 | 4500 |
| 1.0 | 480 |
Observations: These Observations are on the dataset and variables and their division.
Column 'Loan on Card' has some null values --> data not maintained for some users.
Observations on the columns:
Morgage: Continous, Numerical
Data[Data.LoanOnCard.isnull()]
| ID | Age | CustomerSince | HighestSpend | ZipCode | HiddenScore | MonthlyAverageSpend | Level | Mortgage | Security | FixedDepositAccount | InternetBanking | CreditCard | LoanOnCard | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 25 | 1 | 49 | 91107 | 4 | 1.6 | 1 | 0 | 1 | 0 | 0 | 0 | NaN |
| 1 | 2 | 45 | 19 | 34 | 90089 | 3 | 1.5 | 1 | 0 | 1 | 0 | 0 | 0 | NaN |
| 2 | 3 | 39 | 15 | 11 | 94720 | 1 | 1.0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN |
| 3 | 4 | 35 | 9 | 100 | 94112 | 1 | 2.7 | 2 | 0 | 0 | 0 | 0 | 0 | NaN |
| 4 | 5 | 35 | 8 | 45 | 91330 | 4 | 1.0 | 2 | 0 | 0 | 0 | 0 | 1 | NaN |
| 5 | 6 | 37 | 13 | 29 | 92121 | 4 | 0.4 | 2 | 155 | 0 | 0 | 1 | 0 | NaN |
| 6 | 7 | 53 | 27 | 72 | 91711 | 2 | 1.5 | 2 | 0 | 0 | 0 | 1 | 0 | NaN |
| 7 | 8 | 50 | 24 | 22 | 93943 | 1 | 0.3 | 3 | 0 | 0 | 0 | 0 | 1 | NaN |
| 8 | 9 | 35 | 10 | 81 | 90089 | 3 | 0.6 | 2 | 104 | 0 | 0 | 1 | 0 | NaN |
| 79 | 80 | 50 | 26 | 19 | 94720 | 2 | 0.4 | 1 | 118 | 0 | 0 | 1 | 0 | NaN |
| 80 | 81 | 60 | 36 | 41 | 95134 | 4 | 1.3 | 1 | 174 | 0 | 0 | 1 | 1 | NaN |
| 81 | 82 | 47 | 22 | 40 | 94612 | 3 | 2.7 | 2 | 0 | 0 | 0 | 1 | 0 | NaN |
| 82 | 83 | 41 | 16 | 82 | 92507 | 1 | 4.0 | 3 | 0 | 0 | 0 | 1 | 0 | NaN |
| 83 | 84 | 33 | 9 | 50 | 94305 | 1 | 2.4 | 2 | 0 | 0 | 0 | 0 | 0 | NaN |
| 84 | 85 | 46 | 22 | 18 | 91730 | 1 | 0.9 | 3 | 0 | 0 | 0 | 1 | 0 | NaN |
| 85 | 86 | 27 | 2 | 109 | 94005 | 4 | 1.8 | 3 | 0 | 0 | 0 | 0 | 0 | NaN |
| 86 | 87 | 40 | 16 | 42 | 94501 | 4 | 2.2 | 2 | 126 | 0 | 0 | 0 | 0 | NaN |
| 87 | 88 | 48 | 22 | 78 | 94305 | 3 | 1.1 | 1 | 0 | 0 | 0 | 1 | 0 | NaN |
| 88 | 89 | 65 | 41 | 51 | 94117 | 2 | 1.1 | 1 | 0 | 0 | 0 | 1 | 0 | NaN |
| 89 | 90 | 25 | -1 | 113 | 94303 | 4 | 2.3 | 3 | 0 | 0 | 0 | 0 | 1 | NaN |
# we see here that from 5000 entries, we have 20 entries as null for one of the target variables.
#Since the number of null records are less, we'll drop these 20 records.
Data.dropna(inplace=True)
Data.shape
(4980, 14)
Data.isnull().sum()
ID 0 Age 0 CustomerSince 0 HighestSpend 0 ZipCode 0 HiddenScore 0 MonthlyAverageSpend 0 Level 0 Mortgage 0 Security 0 FixedDepositAccount 0 InternetBanking 0 CreditCard 0 LoanOnCard 0 dtype: int64
Data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 4980 entries, 9 to 4999 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ID 4980 non-null int64 1 Age 4980 non-null int64 2 CustomerSince 4980 non-null int64 3 HighestSpend 4980 non-null int64 4 ZipCode 4980 non-null int64 5 HiddenScore 4980 non-null int64 6 MonthlyAverageSpend 4980 non-null float64 7 Level 4980 non-null int64 8 Mortgage 4980 non-null int64 9 Security 4980 non-null int64 10 FixedDepositAccount 4980 non-null int64 11 InternetBanking 4980 non-null int64 12 CreditCard 4980 non-null int64 13 LoanOnCard 4980 non-null float64 dtypes: float64(2), int64(12) memory usage: 583.6 KB
# we'll now make changes on the variable types.
# We have already noted all the categorical variables above, we'll make the change on datatypes now.
var = ['HiddenScore',
'Level',
'Security',
'FixedDepositAccount',
'InternetBanking',
'CreditCard',
'LoanOnCard']
for v in var:
Data[v] = Data[v].astype('category')
Data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 4980 entries, 9 to 4999 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ID 4980 non-null int64 1 Age 4980 non-null int64 2 CustomerSince 4980 non-null int64 3 HighestSpend 4980 non-null int64 4 ZipCode 4980 non-null int64 5 HiddenScore 4980 non-null category 6 MonthlyAverageSpend 4980 non-null float64 7 Level 4980 non-null category 8 Mortgage 4980 non-null int64 9 Security 4980 non-null category 10 FixedDepositAccount 4980 non-null category 11 InternetBanking 4980 non-null category 12 CreditCard 4980 non-null category 13 LoanOnCard 4980 non-null category dtypes: category(7), float64(1), int64(6) memory usage: 346.1 KB
# we'll start with Basic EDA:
Data.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| ID | 4980.0 | 2510.345382 | 1438.011129 | 10.0 | 1265.75 | 2510.5 | 3755.250 | 5000.0 |
| Age | 4980.0 | 45.352610 | 11.464212 | 23.0 | 35.00 | 45.0 | 55.000 | 67.0 |
| CustomerSince | 4980.0 | 20.117671 | 11.468716 | -3.0 | 10.00 | 20.0 | 30.000 | 43.0 |
| HighestSpend | 4980.0 | 73.852410 | 46.070090 | 8.0 | 39.00 | 64.0 | 98.000 | 224.0 |
| ZipCode | 4980.0 | 93152.420482 | 2123.660073 | 9307.0 | 91911.00 | 93407.0 | 94608.000 | 96651.0 |
| MonthlyAverageSpend | 4980.0 | 1.939536 | 1.750006 | 0.0 | 0.70 | 1.5 | 2.525 | 10.0 |
| Mortgage | 4980.0 | 56.589759 | 101.836758 | 0.0 | 0.00 | 0.0 | 101.000 | 635.0 |
Observations:
# we'll now start with Uni, Bi and multivariate analysis:
#1. Age
sns.distplot(Data.Age,color='green').set_title('Desnsity plot for Age')
plt.show()
sns.boxplot(Data.Age, color='blue').set_title('Box plot for Age')
plt.show()
Observation:
#2. CustomerSince
sns.distplot(Data.CustomerSince,color='Orange').set_title('Desnsity plot for CustomerSince')
plt.show()
sns.boxplot(Data.CustomerSince, color='brown').set_title('Box plot for CustomerSince')
plt.show()
#3. HighestSpend
sns.distplot(Data.HighestSpend,color='pink').set_title('Desnsity plot for Highest Spent')
plt.show()
plt.figure(figsize = (15, 8))
sns.boxplot(Data.HighestSpend, color='gold').set_title('Box plot for Highest Spent')
plt.show()
# We can see, that there are outliers present here. We'll find the number of outliers.
q1 = np.percentile(Data.HighestSpend,25)
q3 = np.percentile(Data.HighestSpend,75)
iqr = q3-q1
u_f = q3 + 1.5*iqr
print('\nMax wisker is at:',u_f)
print("\nNo. of outiers:",Data.HighestSpend [Data['HighestSpend'] > u_f].count())
Max wisker is at: 186.5 No. of outiers: 96
# We'll not be conducting action on Zipcode as id does not provide much info. --> will drop it.
# 4. Monthly Avg. Spent
sns.distplot(Data.MonthlyAverageSpend,color='green').set_title('Desnsity plot for MonthlyAvgSpent')
plt.show()
plt.figure(figsize = (15, 8))
sns.boxplot(Data.MonthlyAverageSpend, color='purple').set_title('Box plot for MonthlyAvgSpent')
plt.show()
# We can see, that there are outliers present here. We'll find the number of outliers.
q1 = np.percentile(Data.MonthlyAverageSpend,25)
q3 = np.percentile(Data.MonthlyAverageSpend,75)
iqr = q3-q1
u_f = q3 + 1.5*iqr
print('\nMax wisker is at:',u_f)
print("\nNo. of outiers:",Data.MonthlyAverageSpend [Data['MonthlyAverageSpend'] > u_f].count())
Max wisker is at: 5.262499999999999 No. of outiers: 324
# 4. Morgage
plt.figure(figsize = (15, 6))
sns.distplot(Data.Mortgage,color='teal').set_title('Desnsity plot for Morgage')
plt.show()
plt.figure(figsize = (15, 6))
sns.boxplot(Data.Mortgage, color='yellow').set_title('Box plot for Morgage')
plt.show()
# We can see, that there are outliers present here. We'll find the number of outliers.
q1 = np.percentile(Data.Mortgage,25)
q3 = np.percentile(Data.Mortgage,75)
iqr = q3-q1
u_f = q3 + 1.5*iqr
print('\nMax wisker is at:',u_f)
print("\nNo. of outiers:",Data.Mortgage [Data['Mortgage'] > u_f].count())
Max wisker is at: 252.5 No. of outiers: 291
Data.Mortgage.value_counts().head(5)
0 3447 98 17 91 16 83 16 89 16 Name: Mortgage, dtype: int64
# Some analysis on Categorical data:
Data.LoanOnCard.value_counts().plot.pie(autopct='%1.1f%%',shadow=True)
<AxesSubplot:ylabel='LoanOnCard'>
Data.HiddenScore.value_counts().plot.pie(autopct='%1.1f%%',shadow=True)
<AxesSubplot:ylabel='HiddenScore'>
Data.FixedDepositAccount.value_counts().plot.pie(autopct='%1.1f%%',shadow=True)
<AxesSubplot:ylabel='FixedDepositAccount'>
Data.CreditCard.value_counts().plot.pie(autopct='%1.1f%%',shadow=True)
<AxesSubplot:ylabel='CreditCard'>
Data.Level.value_counts().plot.pie(autopct='%1.1f%%',shadow=True)
<AxesSubplot:ylabel='Level'>
Observations:
# we'll have a check on CategoricalvsCategorical as well now:
plt.figure(figsize = (10, 6))
sns.countplot(x=Data.HiddenScore,hue=Data.LoanOnCard)
<AxesSubplot:xlabel='HiddenScore', ylabel='count'>
plt.figure(figsize = (10, 6))
sns.countplot(x=Data.CreditCard,hue=Data.LoanOnCard)
<AxesSubplot:xlabel='CreditCard', ylabel='count'>
plt.figure(figsize = (10, 6))
sns.countplot(x=Data.Security,hue=Data.LoanOnCard)
<AxesSubplot:xlabel='Security', ylabel='count'>
plt.figure(figsize = (10, 6))
sns.countplot(x=Data.Security,hue=Data.CreditCard)
<AxesSubplot:xlabel='Security', ylabel='count'>
Observations:
correlation = Data.corr()
plt.figure(figsize = (15, 10))
plt.xticks(fontsize = 15, rotation = 45)
plt.yticks(fontsize = 15)
plt.title('Correlation HeatMap', fontsize = 15)
sns.heatmap(correlation, annot = True, cmap = 'YlGnBu')
<AxesSubplot:title={'center':'Correlation HeatMap'}>
# from above analysis, EDA and Corr, we found that columns Age, Customer Since and ZIpcode are not much of contributers into our dataset.
# hence, we'll drop these columns for model building.
Data.drop(['Age','CustomerSince','ZipCode'], axis = 1, inplace=True)
Data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 4980 entries, 9 to 4999 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ID 4980 non-null int64 1 HighestSpend 4980 non-null int64 2 HiddenScore 4980 non-null category 3 MonthlyAverageSpend 4980 non-null float64 4 Level 4980 non-null category 5 Mortgage 4980 non-null int64 6 Security 4980 non-null category 7 FixedDepositAccount 4980 non-null category 8 InternetBanking 4980 non-null category 9 CreditCard 4980 non-null category 10 LoanOnCard 4980 non-null category dtypes: category(7), float64(1), int64(3) memory usage: 389.3 KB
Data.drop('ID',axis=1,inplace=True)
Data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 4980 entries, 9 to 4999 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 HighestSpend 4980 non-null int64 1 HiddenScore 4980 non-null category 2 MonthlyAverageSpend 4980 non-null float64 3 Level 4980 non-null category 4 Mortgage 4980 non-null int64 5 Security 4980 non-null category 6 FixedDepositAccount 4980 non-null category 7 InternetBanking 4980 non-null category 8 CreditCard 4980 non-null category 9 LoanOnCard 4980 non-null category dtypes: category(7), float64(1), int64(2) memory usage: 350.4 KB
# Also, after making checks, we can confirm that out of all categorical values,
# Hidden Score, Level, Fixed Deposit and LoanOnCard are more contributing than Security, InternetBanking and CreditCard.
# Hence, we'll driop these values as well:
Data.drop(['CreditCard','InternetBanking','Security'],axis=1,inplace=True)
Data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 4980 entries, 9 to 4999 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 HighestSpend 4980 non-null int64 1 HiddenScore 4980 non-null category 2 MonthlyAverageSpend 4980 non-null float64 3 Level 4980 non-null category 4 Mortgage 4980 non-null int64 5 FixedDepositAccount 4980 non-null category 6 LoanOnCard 4980 non-null category dtypes: category(4), float64(1), int64(2) memory usage: 335.6 KB
# We'll now fix the outliers and replace them with the mean value for more accurate model developemnt.
# variables with outliers:
col=['HighestSpend','MonthlyAverageSpend','Mortgage']
# magic hapenning below :P
#we'll impute the values of Mean for all the outliers present in 3 variabless
for c in col:
#getting upper lower quartile values
q25,q75=np.percentile(Data[c],25),np.percentile(Data[c],75)
IQR=q75-q25
Threshold=IQR*1.5
lower,upper=q25-Threshold,q75+Threshold
Outliers=[i for i in Data[c] if i < lower or i > upper]
print('{}Number of outliers in {} Before Imputing : {}'.format('\033[1m',c,len(Outliers)))
#taking mean of a column without considering outliers
Data_include = Data.loc[(Data[c] >= lower) & (Data[c] <= upper)]
mean=int(Data_include[c].mean())
print('{}Mean {} is {}'.format('\033[1m',c,mean))
#imputing outliers with mean
Data[c]=np.where(Data[c]>upper,mean,Data[c])
Data[c]=np.where(Data[c]<lower,mean,Data[c])
Outliers=[i for i in Data[c] if i < lower or i > upper]
print('{}Number of outliers in {} After Imputing : {}'.format('\033[1m',c,len(Outliers)))
print('\n')
Number of outliers in HighestSpend Before Imputing : 96 Mean HighestSpend is 71 Number of outliers in HighestSpend After Imputing : 0 Number of outliers in MonthlyAverageSpend Before Imputing : 324 Mean MonthlyAverageSpend is 1 Number of outliers in MonthlyAverageSpend After Imputing : 0 Number of outliers in Mortgage Before Imputing : 291 Mean Mortgage is 38 Number of outliers in Mortgage After Imputing : 0
Data.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| HighestSpend | 4980.0 | 71.468474 | 42.834544 | 8.0 | 39.0 | 64.0 | 93.0 | 185.0 |
| MonthlyAverageSpend | 4980.0 | 1.559255 | 1.160226 | 0.0 | 0.7 | 1.3 | 2.2 | 5.2 |
| Mortgage | 4980.0 | 38.027510 | 66.111473 | 0.0 | 0.0 | 0.0 | 38.0 | 252.0 |
# Now, we'll check for Target imbalancing before splitting:
sns.countplot('LoanOnCard',data=Data)
<AxesSubplot:xlabel='LoanOnCard', ylabel='count'>
Data.LoanOnCard.value_counts().to_frame()
| LoanOnCard | |
|---|---|
| 0.0 | 4500 |
| 1.0 | 480 |
# We can clearly see that the division of data between Loan: Yes or no is not very balanced.
# To balance data for splitting, we'll use Oversampeling method,
# this will not let the important information get removed,compared to undersampling.
# We'll be performing train - test split:
# 1st well do splitting without balancing-
X=Data.drop(columns='LoanOnCard')
y=Data['LoanOnCard']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=10)
y_train.value_counts()
0.0 3153 1.0 333 Name: LoanOnCard, dtype: int64
y_test.value_counts()
0.0 1347 1.0 147 Name: LoanOnCard, dtype: int64
y.value_counts()
0.0 4500 1.0 480 Name: LoanOnCard, dtype: int64
Here, we can see the division of y data along with y_test and y_train data. The number of 0 Cases is way more than number of 1 cases. Hence, We'll perform Smote action to balance the data of Categorical class.
from imblearn.over_sampling import SMOTENC
from imblearn.over_sampling import SMOTE
smote_nc=SMOTENC(categorical_features=[1,3,5],random_state=1) #specifying categorical column numbers
x_s,y_s=smote_nc.fit_resample(X,y)
y_s.value_counts()
1.0 4500 0.0 4500 Name: LoanOnCard, dtype: int64
We can see here now, that the value for both the classes are equaly present. Balancing is now done.
# Again splitting the data on balanced sample
X_train, X_test, y_train, y_test = train_test_split(x_s, y_s, test_size=0.30, random_state=10)
# importing libraries for Logistic Reg and Gaussian model.
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn import model_selection
import warnings
# Logistic regression model
logit = LogisticRegression()
logit.fit(X_train, y_train)
logit_pred = logit.predict(X_test)
logit.score(X_train,y_train)
0.8765079365079365
logit.score(X_test,y_test)
0.88
y_test.value_counts().sum()
2700
print('Report of our Logistic Regression model:\n',
classification_report(y_test,logit_pred))
Report of our Logistic Regression model:
precision recall f1-score support
0.0 0.89 0.87 0.88 1349
1.0 0.87 0.89 0.88 1351
accuracy 0.88 2700
macro avg 0.88 0.88 0.88 2700
weighted avg 0.88 0.88 0.88 2700
cm = confusion_matrix(y_test, logit_pred, labels=[0, 1])
d_cm = pd.DataFrame(cm, index = [i for i in ["Non-Loan Cust.","Loan Cust."]],
columns = [i for i in ["Non-Loan Cust.","Loan Cust."]])
plt.figure(figsize = (7,5))
sns.heatmap(d_cm, annot=True ,fmt='g', cmap = 'YlGnBu')
plt.show()
Observations:
# naive bayes model:
X_train, X_test, y_train, y_test = train_test_split(x_s, y_s, test_size=0.30, random_state=5)
g_m = GaussianNB()
g_m.fit(X_train, y_train.ravel())
g_pred = g_m.predict(X_test)
# training data score
g_m.score(X_train, y_train)
0.8511111111111112
# testing data score
g_m.score(X_test, y_test)
0.8633333333333333
cm1 = confusion_matrix(y_test, g_pred, labels=[0, 1])
d1_cm = pd.DataFrame(cm1, index = [i for i in ["Non-Loan Cust.","Loan Cust."]],
columns = [i for i in ["Non-Loan Cust.","Loan Cust."]])
plt.figure(figsize = (7,5))
sns.heatmap(d1_cm, annot=True ,fmt='g', cmap = 'YlGnBu')
plt.show()
print('Report of our Naive Bayes model:\n',
classification_report(y_test,g_pred))
Report of our Naive Bayes model:
precision recall f1-score support
0.0 0.84 0.89 0.87 1341
1.0 0.88 0.84 0.86 1359
accuracy 0.86 2700
macro avg 0.86 0.86 0.86 2700
weighted avg 0.86 0.86 0.86 2700
Observations:
The Accuracy score of the model is 86%. (which is less than Logit model)
The model predicted 2331 cases correctly from 2700.